[compiler-rt] 8e93d4c - tsan: fork runtime

Dmitry Vyukov via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 8 22:28:34 PST 2021


Author: Dmitry Vyukov
Date: 2021-12-09T07:28:26+01:00
New Revision: 8e93d4c996c0ae6605ba14f1e9d16dc45b0ab695

URL: https://github.com/llvm/llvm-project/commit/8e93d4c996c0ae6605ba14f1e9d16dc45b0ab695
DIFF: https://github.com/llvm/llvm-project/commit/8e93d4c996c0ae6605ba14f1e9d16dc45b0ab695.diff

LOG: tsan: fork runtime

Fork the current version of tsan runtime before commiting
rewrite of the runtime (D112603). The old runtime can be
enabled with TSAN_USE_OLD_RUNTIME option.
This is a temporal measure for emergencies and is required
for Chromium rollout (for context see http://crbug.com/1275581).
The old runtime is supposed to be deleted soon.

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D115223

Added: 
    compiler-rt/lib/tsan/rtl-old/CMakeLists.txt
    compiler-rt/lib/tsan/rtl-old/tsan.syms.extra
    compiler-rt/lib/tsan/rtl-old/tsan_clock.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_clock.h
    compiler-rt/lib/tsan/rtl-old/tsan_debugging.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_defs.h
    compiler-rt/lib/tsan/rtl-old/tsan_dense_alloc.h
    compiler-rt/lib/tsan/rtl-old/tsan_dispatch_defs.h
    compiler-rt/lib/tsan/rtl-old/tsan_external.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_fd.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_fd.h
    compiler-rt/lib/tsan/rtl-old/tsan_flags.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_flags.h
    compiler-rt/lib/tsan/rtl-old/tsan_flags.inc
    compiler-rt/lib/tsan/rtl-old/tsan_ignoreset.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_ignoreset.h
    compiler-rt/lib/tsan/rtl-old/tsan_ilist.h
    compiler-rt/lib/tsan/rtl-old/tsan_interceptors.h
    compiler-rt/lib/tsan/rtl-old/tsan_interceptors_libdispatch.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mac.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mach_vm.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_interceptors_posix.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_interface.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_interface.h
    compiler-rt/lib/tsan/rtl-old/tsan_interface.inc
    compiler-rt/lib/tsan/rtl-old/tsan_interface_ann.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_interface_ann.h
    compiler-rt/lib/tsan/rtl-old/tsan_interface_atomic.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_interface_java.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_interface_java.h
    compiler-rt/lib/tsan/rtl-old/tsan_malloc_mac.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_md5.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_mman.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_mman.h
    compiler-rt/lib/tsan/rtl-old/tsan_mutexset.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_mutexset.h
    compiler-rt/lib/tsan/rtl-old/tsan_new_delete.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_platform.h
    compiler-rt/lib/tsan/rtl-old/tsan_platform_linux.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_platform_mac.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_platform_posix.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_platform_windows.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_ppc_regs.h
    compiler-rt/lib/tsan/rtl-old/tsan_preinit.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_report.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_report.h
    compiler-rt/lib/tsan/rtl-old/tsan_rtl.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_rtl.h
    compiler-rt/lib/tsan/rtl-old/tsan_rtl_aarch64.S
    compiler-rt/lib/tsan/rtl-old/tsan_rtl_access.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_rtl_amd64.S
    compiler-rt/lib/tsan/rtl-old/tsan_rtl_mips64.S
    compiler-rt/lib/tsan/rtl-old/tsan_rtl_mutex.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_rtl_ppc64.S
    compiler-rt/lib/tsan/rtl-old/tsan_rtl_proc.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_rtl_report.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_rtl_s390x.S
    compiler-rt/lib/tsan/rtl-old/tsan_rtl_thread.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_shadow.h
    compiler-rt/lib/tsan/rtl-old/tsan_stack_trace.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_stack_trace.h
    compiler-rt/lib/tsan/rtl-old/tsan_suppressions.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_suppressions.h
    compiler-rt/lib/tsan/rtl-old/tsan_symbolize.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_symbolize.h
    compiler-rt/lib/tsan/rtl-old/tsan_sync.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_sync.h
    compiler-rt/lib/tsan/rtl-old/tsan_trace.h
    compiler-rt/lib/tsan/rtl-old/tsan_update_shadow_word.inc
    compiler-rt/lib/tsan/rtl-old/tsan_vector_clock.cpp
    compiler-rt/lib/tsan/rtl-old/tsan_vector_clock.h

Modified: 
    compiler-rt/lib/tsan/CMakeLists.txt

Removed: 
    


################################################################################
diff  --git a/compiler-rt/lib/tsan/CMakeLists.txt b/compiler-rt/lib/tsan/CMakeLists.txt
index c5ec6b0ddfd22..a327fb14de9a5 100644
--- a/compiler-rt/lib/tsan/CMakeLists.txt
+++ b/compiler-rt/lib/tsan/CMakeLists.txt
@@ -16,7 +16,12 @@ if(COMPILER_RT_TSAN_DEBUG_OUTPUT)
 endif()
 
 # Add the actual runtime library.
-add_subdirectory(rtl)
+option(TSAN_USE_OLD_RUNTIME "Use the old tsan runtime (temporal option for emergencies)." OFF)
+if (TSAN_USE_OLD_RUNTIME)
+  add_subdirectory(rtl-old)
+else()
+  add_subdirectory(rtl)
+endif()
 
 # Build libcxx instrumented with TSan.
 if(COMPILER_RT_LIBCXX_PATH AND

diff  --git a/compiler-rt/lib/tsan/rtl-old/CMakeLists.txt b/compiler-rt/lib/tsan/rtl-old/CMakeLists.txt
new file mode 100644
index 0000000000000..061f6c33690be
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/CMakeLists.txt
@@ -0,0 +1,285 @@
+include_directories(../..)
+
+set(TSAN_RTL_CFLAGS ${TSAN_CFLAGS})
+append_list_if(COMPILER_RT_HAS_MSSE4_2_FLAG -msse4.2 TSAN_RTL_CFLAGS)
+append_list_if(SANITIZER_LIMIT_FRAME_SIZE -Wframe-larger-than=530
+               TSAN_RTL_CFLAGS)
+append_list_if(COMPILER_RT_HAS_WGLOBAL_CONSTRUCTORS_FLAG -Wglobal-constructors
+               TSAN_RTL_CFLAGS)
+
+set(TSAN_RTL_DYNAMIC_CFLAGS ${TSAN_RTL_CFLAGS})
+list(REMOVE_ITEM TSAN_RTL_DYNAMIC_CFLAGS -fPIE)
+
+set(TSAN_DYNAMIC_LINK_LIBS ${SANITIZER_CXX_ABI_LIBRARIES} ${SANITIZER_COMMON_LINK_LIBS})
+
+append_list_if(COMPILER_RT_HAS_LIBDL dl TSAN_DYNAMIC_LINK_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBM m TSAN_DYNAMIC_LINK_LIBS)
+append_list_if(COMPILER_RT_HAS_LIBPTHREAD pthread TSAN_DYNAMIC_LINK_LIBS)
+
+set(TSAN_SOURCES
+  tsan_clock.cpp
+  tsan_debugging.cpp
+  tsan_external.cpp
+  tsan_fd.cpp
+  tsan_flags.cpp
+  tsan_ignoreset.cpp
+  tsan_interceptors_posix.cpp
+  tsan_interface.cpp
+  tsan_interface_ann.cpp
+  tsan_interface_atomic.cpp
+  tsan_interface_java.cpp
+  tsan_malloc_mac.cpp
+  tsan_md5.cpp
+  tsan_mman.cpp
+  tsan_mutexset.cpp
+  tsan_report.cpp
+  tsan_rtl.cpp
+  tsan_rtl_access.cpp
+  tsan_rtl_mutex.cpp
+  tsan_rtl_proc.cpp
+  tsan_rtl_report.cpp
+  tsan_rtl_thread.cpp
+  tsan_stack_trace.cpp
+  tsan_suppressions.cpp
+  tsan_symbolize.cpp
+  tsan_sync.cpp
+  tsan_vector_clock.cpp
+  )
+
+set(TSAN_CXX_SOURCES
+  tsan_new_delete.cpp
+  )
+
+set(TSAN_PREINIT_SOURCES
+  tsan_preinit.cpp
+  )
+
+if(APPLE)
+  list(APPEND TSAN_SOURCES
+    tsan_interceptors_mac.cpp
+    tsan_interceptors_mach_vm.cpp
+    tsan_platform_mac.cpp
+    tsan_platform_posix.cpp
+    )
+elseif(UNIX)
+  # Assume Linux
+  list(APPEND TSAN_SOURCES
+    tsan_platform_linux.cpp
+    tsan_platform_posix.cpp
+    )
+endif()
+
+if(COMPILER_RT_INTERCEPT_LIBDISPATCH)
+  list(APPEND TSAN_SOURCES
+    tsan_interceptors_libdispatch.cpp
+    )
+  list(APPEND TSAN_RTL_CFLAGS ${COMPILER_RT_LIBDISPATCH_CFLAGS})
+endif()
+
+set(TSAN_HEADERS
+  tsan_clock.h
+  tsan_defs.h
+  tsan_dense_alloc.h
+  tsan_fd.h
+  tsan_flags.h
+  tsan_flags.inc
+  tsan_ignoreset.h
+  tsan_ilist.h
+  tsan_interceptors.h
+  tsan_interface.h
+  tsan_interface.inc
+  tsan_interface_ann.h
+  tsan_interface_java.h
+  tsan_mman.h
+  tsan_mutexset.h
+  tsan_platform.h
+  tsan_ppc_regs.h
+  tsan_report.h
+  tsan_rtl.h
+  tsan_shadow.h
+  tsan_stack_trace.h
+  tsan_suppressions.h
+  tsan_symbolize.h
+  tsan_sync.h
+  tsan_trace.h
+  tsan_update_shadow_word.inc
+  tsan_vector_clock.h
+  )
+
+set(TSAN_RUNTIME_LIBRARIES)
+add_compiler_rt_component(tsan)
+
+if("${CMAKE_C_FLAGS}" MATCHES "-Wno-(error=)?unused-command-line-argument")
+  set(EXTRA_CFLAGS "-Wno-error=unused-command-line-argument ${EXTRA_CFLAGS}")
+endif()
+
+if(APPLE)
+  # Ideally we would check the SDK version for the actual platform we are
+  # building for here.  To make our lifes easier we assume the host SDK setup is
+  # sane and use the macOS SDK version as a proxy for aligned SDKs.
+  find_darwin_sdk_version(macosx_sdk_version "macosx")
+  if ("${macosx_sdk_version}" VERSION_LESS 10.12)
+    message(FATAL_ERROR "Building the TSan runtime requires at least macOS SDK 10.12 (or aligned SDK on other platforms)")
+  endif()
+
+  add_asm_sources(TSAN_ASM_SOURCES
+    tsan_rtl_amd64.S
+    tsan_rtl_aarch64.S
+    )
+
+  set(TSAN_LINK_LIBS ${SANITIZER_COMMON_LINK_LIBS})
+
+  add_weak_symbols("ubsan" WEAK_SYMBOL_LINK_FLAGS)
+  add_weak_symbols("sanitizer_common" WEAK_SYMBOL_LINK_FLAGS)
+
+  add_compiler_rt_runtime(clang_rt.tsan
+    SHARED
+    OS ${TSAN_SUPPORTED_OS}
+    ARCHS ${TSAN_SUPPORTED_ARCH}
+    SOURCES ${TSAN_SOURCES} ${TSAN_CXX_SOURCES} ${TSAN_ASM_SOURCES}
+    ADDITIONAL_HEADERS ${TSAN_HEADERS}
+    OBJECT_LIBS RTInterception
+                RTSanitizerCommon
+                RTSanitizerCommonLibc
+                RTSanitizerCommonCoverage
+                RTSanitizerCommonSymbolizer
+                RTUbsan
+    CFLAGS ${TSAN_RTL_CFLAGS}
+    LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS} ${WEAK_SYMBOL_LINK_FLAGS}
+    LINK_LIBS ${TSAN_LINK_LIBS} objc
+    PARENT_TARGET tsan)
+  add_compiler_rt_object_libraries(RTTsan_dynamic
+    OS ${TSAN_SUPPORTED_OS}
+    ARCHS ${TSAN_SUPPORTED_ARCH}
+    SOURCES ${TSAN_SOURCES} ${TSAN_CXX_SOURCES} ${TSAN_ASM_SOURCES}
+    ADDITIONAL_HEADERS ${TSAN_HEADERS}
+    CFLAGS ${TSAN_RTL_CFLAGS})
+
+  # Build and check Go runtime.
+  set(BUILDGO_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../go/buildgo.sh)
+  add_custom_target(GotsanRuntimeCheck
+    COMMAND env "CC=${CMAKE_C_COMPILER} ${OSX_SYSROOT_FLAG}"
+            EXTRA_CFLAGS=${EXTRA_CFLAGS}
+            IN_TMPDIR=1 SILENT=1 ${BUILDGO_SCRIPT}
+    DEPENDS tsan ${BUILDGO_SCRIPT}
+    WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../go
+    COMMENT "Checking TSan Go runtime..."
+    VERBATIM)
+  set_target_properties(GotsanRuntimeCheck PROPERTIES FOLDER "Compiler-RT Misc")
+else()
+  foreach(arch ${TSAN_SUPPORTED_ARCH})
+    if(arch STREQUAL "x86_64")
+      add_asm_sources(TSAN_ASM_SOURCES
+        tsan_rtl_amd64.S
+        )
+      # Sanity check for Go runtime.
+      set(BUILDGO_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../go/buildgo.sh)
+      add_custom_target(GotsanRuntimeCheck
+        COMMAND env "CC=${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}"
+                EXTRA_CFLAGS=${EXTRA_CFLAGS}
+                IN_TMPDIR=1 SILENT=1 ${BUILDGO_SCRIPT}
+        DEPENDS clang_rt.tsan-${arch} ${BUILDGO_SCRIPT}
+        WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../go
+        COMMENT "Checking TSan Go runtime..."
+        VERBATIM)
+    elseif(arch STREQUAL "aarch64")
+      add_asm_sources(TSAN_ASM_SOURCES
+        tsan_rtl_aarch64.S
+        )
+      # Sanity check for Go runtime.
+      set(BUILDGO_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../go/buildgo.sh)
+      add_custom_target(GotsanRuntimeCheck
+	COMMAND env "CC=${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}"
+		EXTRA_CFLAGS=${EXTRA_CFLAGS}
+		IN_TMPDIR=1 SILENT=1 ${BUILDGO_SCRIPT}
+	DEPENDS clang_rt.tsan-${arch} ${BUILDGO_SCRIPT}
+	WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../go
+	COMMENT "Checking TSan Go runtime..."
+	VERBATIM)
+    elseif(arch MATCHES "powerpc64|powerpc64le")
+      add_asm_sources(TSAN_ASM_SOURCES
+        tsan_rtl_ppc64.S
+        )
+      # Sanity check for Go runtime.
+      set(BUILDGO_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../go/buildgo.sh)
+      add_custom_target(GotsanRuntimeCheck
+	COMMAND env "CC=${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}"
+		EXTRA_CFLAGS=${EXTRA_CFLAGS}
+		IN_TMPDIR=1 SILENT=1 ${BUILDGO_SCRIPT}
+	DEPENDS clang_rt.tsan-${arch} ${BUILDGO_SCRIPT}
+	WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../go
+	COMMENT "Checking TSan Go runtime..."
+	VERBATIM)
+    elseif(arch MATCHES "mips64|mips64le")
+      add_asm_sources(TSAN_ASM_SOURCES
+        tsan_rtl_mips64.S
+        )
+    elseif(arch MATCHES "s390x")
+      add_asm_sources(TSAN_ASM_SOURCES
+        tsan_rtl_s390x.S
+        )
+      # Sanity check for Go runtime.
+      set(BUILDGO_SCRIPT ${CMAKE_CURRENT_SOURCE_DIR}/../go/buildgo.sh)
+      add_custom_target(GotsanRuntimeCheck
+	COMMAND env "CC=${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}"
+		EXTRA_CFLAGS=${EXTRA_CFLAGS}
+		IN_TMPDIR=1 SILENT=1 ${BUILDGO_SCRIPT}
+	DEPENDS clang_rt.tsan-${arch} ${BUILDGO_SCRIPT}
+	WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../go
+	COMMENT "Checking TSan Go runtime..."
+	VERBATIM)
+    else()
+      set(TSAN_ASM_SOURCES)
+    endif()
+    add_compiler_rt_runtime(clang_rt.tsan
+      STATIC
+      ARCHS ${arch}
+      SOURCES ${TSAN_SOURCES} ${TSAN_ASM_SOURCES} ${TSAN_PREINIT_SOURCES}
+              $<TARGET_OBJECTS:RTInterception.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
+              $<TARGET_OBJECTS:RTUbsan.${arch}>
+      ADDITIONAL_HEADERS ${TSAN_HEADERS}
+      CFLAGS ${TSAN_RTL_CFLAGS}
+      PARENT_TARGET tsan)
+    add_compiler_rt_runtime(clang_rt.tsan_cxx
+      STATIC
+      ARCHS ${arch}
+      SOURCES ${TSAN_CXX_SOURCES}
+              $<TARGET_OBJECTS:RTUbsan_cxx.${arch}>
+      ADDITIONAL_HEADERS ${TSAN_HEADERS}
+      CFLAGS ${TSAN_RTL_CFLAGS}
+      PARENT_TARGET tsan)
+    list(APPEND TSAN_RUNTIME_LIBRARIES clang_rt.tsan-${arch}
+                                       clang_rt.tsan_cxx-${arch})
+    add_compiler_rt_runtime(clang_rt.tsan
+      SHARED
+      ARCHS ${arch}
+      SOURCES ${TSAN_SOURCES} ${TSAN_ASM_SOURCES}
+              $<TARGET_OBJECTS:RTInterception.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommon.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommonLibc.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommonCoverage.${arch}>
+              $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
+              $<TARGET_OBJECTS:RTUbsan.${arch}>
+      ADDITIONAL_HEADERS ${TSAN_HEADERS}
+      CFLAGS ${TSAN_RTL_DYNAMIC_CFLAGS}
+      LINK_LIBS ${TSAN_DYNAMIC_LINK_LIBS}
+      LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS}
+      PARENT_TARGET tsan)
+    add_sanitizer_rt_symbols(clang_rt.tsan
+      ARCHS ${arch}
+      EXTRA tsan.syms.extra)
+    add_sanitizer_rt_symbols(clang_rt.tsan_cxx
+      ARCHS ${arch}
+      EXTRA tsan.syms.extra)
+    add_dependencies(tsan clang_rt.tsan-${arch}
+                          clang_rt.tsan_cxx-${arch}
+                          clang_rt.tsan-${arch}-symbols
+                          clang_rt.tsan_cxx-${arch}-symbols)
+  endforeach()
+endif()
+
+

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan.syms.extra b/compiler-rt/lib/tsan/rtl-old/tsan.syms.extra
new file mode 100644
index 0000000000000..4838bb0a72792
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan.syms.extra
@@ -0,0 +1,31 @@
+__tsan_init
+__tsan_flush_memory
+__tsan_read*
+__tsan_write*
+__tsan_vptr*
+__tsan_func*
+__tsan_atomic*
+__tsan_java*
+__tsan_unaligned*
+__tsan_release
+__tsan_acquire
+__tsan_mutex_create
+__tsan_mutex_destroy
+__tsan_mutex_pre_lock
+__tsan_mutex_post_lock
+__tsan_mutex_pre_unlock
+__tsan_mutex_post_unlock
+__tsan_mutex_pre_signal
+__tsan_mutex_post_signal
+__tsan_mutex_pre_divert
+__tsan_mutex_post_divert
+__tsan_get_current_fiber
+__tsan_create_fiber
+__tsan_destroy_fiber
+__tsan_switch_to_fiber
+__tsan_set_fiber_name
+__ubsan_*
+Annotate*
+WTFAnnotate*
+RunningOnValgrind
+ValgrindSlowdown

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_clock.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_clock.cpp
new file mode 100644
index 0000000000000..d122b67c0aaa5
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_clock.cpp
@@ -0,0 +1,625 @@
+//===-- tsan_clock.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_clock.h"
+#include "tsan_rtl.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+
+// SyncClock and ThreadClock implement vector clocks for sync variables
+// (mutexes, atomic variables, file descriptors, etc) and threads, respectively.
+// ThreadClock contains fixed-size vector clock for maximum number of threads.
+// SyncClock contains growable vector clock for currently necessary number of
+// threads.
+// Together they implement very simple model of operations, namely:
+//
+//   void ThreadClock::acquire(const SyncClock *src) {
+//     for (int i = 0; i < kMaxThreads; i++)
+//       clock[i] = max(clock[i], src->clock[i]);
+//   }
+//
+//   void ThreadClock::release(SyncClock *dst) const {
+//     for (int i = 0; i < kMaxThreads; i++)
+//       dst->clock[i] = max(dst->clock[i], clock[i]);
+//   }
+//
+//   void ThreadClock::releaseStoreAcquire(SyncClock *sc) const {
+//     for (int i = 0; i < kMaxThreads; i++) {
+//       tmp = clock[i];
+//       clock[i] = max(clock[i], sc->clock[i]);
+//       sc->clock[i] = tmp;
+//     }
+//   }
+//
+//   void ThreadClock::ReleaseStore(SyncClock *dst) const {
+//     for (int i = 0; i < kMaxThreads; i++)
+//       dst->clock[i] = clock[i];
+//   }
+//
+//   void ThreadClock::acq_rel(SyncClock *dst) {
+//     acquire(dst);
+//     release(dst);
+//   }
+//
+// Conformance to this model is extensively verified in tsan_clock_test.cpp.
+// However, the implementation is significantly more complex. The complexity
+// allows to implement important classes of use cases in O(1) instead of O(N).
+//
+// The use cases are:
+// 1. Singleton/once atomic that has a single release-store operation followed
+//    by zillions of acquire-loads (the acquire-load is O(1)).
+// 2. Thread-local mutex (both lock and unlock can be O(1)).
+// 3. Leaf mutex (unlock is O(1)).
+// 4. A mutex shared by 2 threads (both lock and unlock can be O(1)).
+// 5. An atomic with a single writer (writes can be O(1)).
+// The implementation dynamically adopts to workload. So if an atomic is in
+// read-only phase, these reads will be O(1); if it later switches to read/write
+// phase, the implementation will correctly handle that by switching to O(N).
+//
+// Thread-safety note: all const operations on SyncClock's are conducted under
+// a shared lock; all non-const operations on SyncClock's are conducted under
+// an exclusive lock; ThreadClock's are private to respective threads and so
+// do not need any protection.
+//
+// Description of SyncClock state:
+// clk_ - variable size vector clock, low kClkBits hold timestamp,
+//   the remaining bits hold "acquired" flag (the actual value is thread's
+//   reused counter);
+//   if acquired == thr->reused_, then the respective thread has already
+//   acquired this clock (except possibly for dirty elements).
+// dirty_ - holds up to two indices in the vector clock that other threads
+//   need to acquire regardless of "acquired" flag value;
+// release_store_tid_ - denotes that the clock state is a result of
+//   release-store operation by the thread with release_store_tid_ index.
+// release_store_reused_ - reuse count of release_store_tid_.
+
+namespace __tsan {
+
+static atomic_uint32_t *ref_ptr(ClockBlock *cb) {
+  return reinterpret_cast<atomic_uint32_t *>(&cb->table[ClockBlock::kRefIdx]);
+}
+
+// Drop reference to the first level block idx.
+static void UnrefClockBlock(ClockCache *c, u32 idx, uptr blocks) {
+  ClockBlock *cb = ctx->clock_alloc.Map(idx);
+  atomic_uint32_t *ref = ref_ptr(cb);
+  u32 v = atomic_load(ref, memory_order_acquire);
+  for (;;) {
+    CHECK_GT(v, 0);
+    if (v == 1)
+      break;
+    if (atomic_compare_exchange_strong(ref, &v, v - 1, memory_order_acq_rel))
+      return;
+  }
+  // First level block owns second level blocks, so them as well.
+  for (uptr i = 0; i < blocks; i++)
+    ctx->clock_alloc.Free(c, cb->table[ClockBlock::kBlockIdx - i]);
+  ctx->clock_alloc.Free(c, idx);
+}
+
+ThreadClock::ThreadClock(unsigned tid, unsigned reused)
+    : tid_(tid)
+    , reused_(reused + 1)  // 0 has special meaning
+    , last_acquire_()
+    , global_acquire_()
+    , cached_idx_()
+    , cached_size_()
+    , cached_blocks_() {
+  CHECK_LT(tid, kMaxTidInClock);
+  CHECK_EQ(reused_, ((u64)reused_ << kClkBits) >> kClkBits);
+  nclk_ = tid_ + 1;
+  internal_memset(clk_, 0, sizeof(clk_));
+}
+
+void ThreadClock::ResetCached(ClockCache *c) {
+  if (cached_idx_) {
+    UnrefClockBlock(c, cached_idx_, cached_blocks_);
+    cached_idx_ = 0;
+    cached_size_ = 0;
+    cached_blocks_ = 0;
+  }
+}
+
+void ThreadClock::acquire(ClockCache *c, SyncClock *src) {
+  DCHECK_LE(nclk_, kMaxTid);
+  DCHECK_LE(src->size_, kMaxTid);
+
+  // Check if it's empty -> no need to do anything.
+  const uptr nclk = src->size_;
+  if (nclk == 0)
+    return;
+
+  bool acquired = false;
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    SyncClock::Dirty dirty = src->dirty_[i];
+    unsigned tid = dirty.tid();
+    if (tid != kInvalidTid) {
+      if (clk_[tid] < dirty.epoch) {
+        clk_[tid] = dirty.epoch;
+        acquired = true;
+      }
+    }
+  }
+
+  // Check if we've already acquired src after the last release operation on src
+  if (tid_ >= nclk || src->elem(tid_).reused != reused_) {
+    // O(N) acquire.
+    nclk_ = max(nclk_, nclk);
+    u64 *dst_pos = &clk_[0];
+    for (ClockElem &src_elem : *src) {
+      u64 epoch = src_elem.epoch;
+      if (*dst_pos < epoch) {
+        *dst_pos = epoch;
+        acquired = true;
+      }
+      dst_pos++;
+    }
+
+    // Remember that this thread has acquired this clock.
+    if (nclk > tid_)
+      src->elem(tid_).reused = reused_;
+  }
+
+  if (acquired) {
+    last_acquire_ = clk_[tid_];
+    ResetCached(c);
+  }
+}
+
+void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) {
+  DCHECK_LE(nclk_, kMaxTid);
+  DCHECK_LE(sc->size_, kMaxTid);
+
+  if (sc->size_ == 0) {
+    // ReleaseStore will correctly set release_store_tid_,
+    // which can be important for future operations.
+    ReleaseStore(c, sc);
+    return;
+  }
+
+  nclk_ = max(nclk_, (uptr) sc->size_);
+
+  // Check if we need to resize sc.
+  if (sc->size_ < nclk_)
+    sc->Resize(c, nclk_);
+
+  bool acquired = false;
+
+  sc->Unshare(c);
+  // Update sc->clk_.
+  sc->FlushDirty();
+  uptr i = 0;
+  for (ClockElem &ce : *sc) {
+    u64 tmp = clk_[i];
+    if (clk_[i] < ce.epoch) {
+      clk_[i] = ce.epoch;
+      acquired = true;
+    }
+    ce.epoch = tmp;
+    ce.reused = 0;
+    i++;
+  }
+  sc->release_store_tid_ = kInvalidTid;
+  sc->release_store_reused_ = 0;
+
+  if (acquired) {
+    last_acquire_ = clk_[tid_];
+    ResetCached(c);
+  }
+}
+
+void ThreadClock::release(ClockCache *c, SyncClock *dst) {
+  DCHECK_LE(nclk_, kMaxTid);
+  DCHECK_LE(dst->size_, kMaxTid);
+
+  if (dst->size_ == 0) {
+    // ReleaseStore will correctly set release_store_tid_,
+    // which can be important for future operations.
+    ReleaseStore(c, dst);
+    return;
+  }
+
+  // Check if we need to resize dst.
+  if (dst->size_ < nclk_)
+    dst->Resize(c, nclk_);
+
+  // Check if we had not acquired anything from other threads
+  // since the last release on dst. If so, we need to update
+  // only dst->elem(tid_).
+  if (!HasAcquiredAfterRelease(dst)) {
+    UpdateCurrentThread(c, dst);
+    if (dst->release_store_tid_ != tid_ ||
+        dst->release_store_reused_ != reused_)
+      dst->release_store_tid_ = kInvalidTid;
+    return;
+  }
+
+  // O(N) release.
+  dst->Unshare(c);
+  // First, remember whether we've acquired dst.
+  bool acquired = IsAlreadyAcquired(dst);
+  // Update dst->clk_.
+  dst->FlushDirty();
+  uptr i = 0;
+  for (ClockElem &ce : *dst) {
+    ce.epoch = max(ce.epoch, clk_[i]);
+    ce.reused = 0;
+    i++;
+  }
+  // Clear 'acquired' flag in the remaining elements.
+  dst->release_store_tid_ = kInvalidTid;
+  dst->release_store_reused_ = 0;
+  // If we've acquired dst, remember this fact,
+  // so that we don't need to acquire it on next acquire.
+  if (acquired)
+    dst->elem(tid_).reused = reused_;
+}
+
+void ThreadClock::ReleaseStore(ClockCache *c, SyncClock *dst) {
+  DCHECK_LE(nclk_, kMaxTid);
+  DCHECK_LE(dst->size_, kMaxTid);
+
+  if (dst->size_ == 0 && cached_idx_ != 0) {
+    // Reuse the cached clock.
+    // Note: we could reuse/cache the cached clock in more cases:
+    // we could update the existing clock and cache it, or replace it with the
+    // currently cached clock and release the old one. And for a shared
+    // existing clock, we could replace it with the currently cached;
+    // or unshare, update and cache. But, for simplicity, we currently reuse
+    // cached clock only when the target clock is empty.
+    dst->tab_ = ctx->clock_alloc.Map(cached_idx_);
+    dst->tab_idx_ = cached_idx_;
+    dst->size_ = cached_size_;
+    dst->blocks_ = cached_blocks_;
+    CHECK_EQ(dst->dirty_[0].tid(), kInvalidTid);
+    // The cached clock is shared (immutable),
+    // so this is where we store the current clock.
+    dst->dirty_[0].set_tid(tid_);
+    dst->dirty_[0].epoch = clk_[tid_];
+    dst->release_store_tid_ = tid_;
+    dst->release_store_reused_ = reused_;
+    // Remember that we don't need to acquire it in future.
+    dst->elem(tid_).reused = reused_;
+    // Grab a reference.
+    atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
+    return;
+  }
+
+  // Check if we need to resize dst.
+  if (dst->size_ < nclk_)
+    dst->Resize(c, nclk_);
+
+  if (dst->release_store_tid_ == tid_ &&
+      dst->release_store_reused_ == reused_ &&
+      !HasAcquiredAfterRelease(dst)) {
+    UpdateCurrentThread(c, dst);
+    return;
+  }
+
+  // O(N) release-store.
+  dst->Unshare(c);
+  // Note: dst can be larger than this ThreadClock.
+  // This is fine since clk_ beyond size is all zeros.
+  uptr i = 0;
+  for (ClockElem &ce : *dst) {
+    ce.epoch = clk_[i];
+    ce.reused = 0;
+    i++;
+  }
+  for (uptr i = 0; i < kDirtyTids; i++) dst->dirty_[i].set_tid(kInvalidTid);
+  dst->release_store_tid_ = tid_;
+  dst->release_store_reused_ = reused_;
+  // Remember that we don't need to acquire it in future.
+  dst->elem(tid_).reused = reused_;
+
+  // If the resulting clock is cachable, cache it for future release operations.
+  // The clock is always cachable if we released to an empty sync object.
+  if (cached_idx_ == 0 && dst->Cachable()) {
+    // Grab a reference to the ClockBlock.
+    atomic_uint32_t *ref = ref_ptr(dst->tab_);
+    if (atomic_load(ref, memory_order_acquire) == 1)
+      atomic_store_relaxed(ref, 2);
+    else
+      atomic_fetch_add(ref_ptr(dst->tab_), 1, memory_order_relaxed);
+    cached_idx_ = dst->tab_idx_;
+    cached_size_ = dst->size_;
+    cached_blocks_ = dst->blocks_;
+  }
+}
+
+void ThreadClock::acq_rel(ClockCache *c, SyncClock *dst) {
+  acquire(c, dst);
+  ReleaseStore(c, dst);
+}
+
+// Updates only single element related to the current thread in dst->clk_.
+void ThreadClock::UpdateCurrentThread(ClockCache *c, SyncClock *dst) const {
+  // Update the threads time, but preserve 'acquired' flag.
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    SyncClock::Dirty *dirty = &dst->dirty_[i];
+    const unsigned tid = dirty->tid();
+    if (tid == tid_ || tid == kInvalidTid) {
+      dirty->set_tid(tid_);
+      dirty->epoch = clk_[tid_];
+      return;
+    }
+  }
+  // Reset all 'acquired' flags, O(N).
+  // We are going to touch dst elements, so we need to unshare it.
+  dst->Unshare(c);
+  dst->elem(tid_).epoch = clk_[tid_];
+  for (uptr i = 0; i < dst->size_; i++)
+    dst->elem(i).reused = 0;
+  dst->FlushDirty();
+}
+
+// Checks whether the current thread has already acquired src.
+bool ThreadClock::IsAlreadyAcquired(const SyncClock *src) const {
+  if (src->elem(tid_).reused != reused_)
+    return false;
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    SyncClock::Dirty dirty = src->dirty_[i];
+    if (dirty.tid() != kInvalidTid) {
+      if (clk_[dirty.tid()] < dirty.epoch)
+        return false;
+    }
+  }
+  return true;
+}
+
+// Checks whether the current thread has acquired anything
+// from other clocks after releasing to dst (directly or indirectly).
+bool ThreadClock::HasAcquiredAfterRelease(const SyncClock *dst) const {
+  const u64 my_epoch = dst->elem(tid_).epoch;
+  return my_epoch <= last_acquire_ ||
+      my_epoch <= atomic_load_relaxed(&global_acquire_);
+}
+
+// Sets a single element in the vector clock.
+// This function is called only from weird places like AcquireGlobal.
+void ThreadClock::set(ClockCache *c, unsigned tid, u64 v) {
+  DCHECK_LT(tid, kMaxTid);
+  DCHECK_GE(v, clk_[tid]);
+  clk_[tid] = v;
+  if (nclk_ <= tid)
+    nclk_ = tid + 1;
+  last_acquire_ = clk_[tid_];
+  ResetCached(c);
+}
+
+void ThreadClock::DebugDump(int(*printf)(const char *s, ...)) {
+  printf("clock=[");
+  for (uptr i = 0; i < nclk_; i++)
+    printf("%s%llu", i == 0 ? "" : ",", clk_[i]);
+  printf("] tid=%u/%u last_acq=%llu", tid_, reused_, last_acquire_);
+}
+
+SyncClock::SyncClock() {
+  ResetImpl();
+}
+
+SyncClock::~SyncClock() {
+  // Reset must be called before dtor.
+  CHECK_EQ(size_, 0);
+  CHECK_EQ(blocks_, 0);
+  CHECK_EQ(tab_, 0);
+  CHECK_EQ(tab_idx_, 0);
+}
+
+void SyncClock::Reset(ClockCache *c) {
+  if (size_)
+    UnrefClockBlock(c, tab_idx_, blocks_);
+  ResetImpl();
+}
+
+void SyncClock::ResetImpl() {
+  tab_ = 0;
+  tab_idx_ = 0;
+  size_ = 0;
+  blocks_ = 0;
+  release_store_tid_ = kInvalidTid;
+  release_store_reused_ = 0;
+  for (uptr i = 0; i < kDirtyTids; i++) dirty_[i].set_tid(kInvalidTid);
+}
+
+void SyncClock::Resize(ClockCache *c, uptr nclk) {
+  Unshare(c);
+  if (nclk <= capacity()) {
+    // Memory is already allocated, just increase the size.
+    size_ = nclk;
+    return;
+  }
+  if (size_ == 0) {
+    // Grow from 0 to one-level table.
+    CHECK_EQ(size_, 0);
+    CHECK_EQ(blocks_, 0);
+    CHECK_EQ(tab_, 0);
+    CHECK_EQ(tab_idx_, 0);
+    tab_idx_ = ctx->clock_alloc.Alloc(c);
+    tab_ = ctx->clock_alloc.Map(tab_idx_);
+    internal_memset(tab_, 0, sizeof(*tab_));
+    atomic_store_relaxed(ref_ptr(tab_), 1);
+    size_ = 1;
+  } else if (size_ > blocks_ * ClockBlock::kClockCount) {
+    u32 idx = ctx->clock_alloc.Alloc(c);
+    ClockBlock *new_cb = ctx->clock_alloc.Map(idx);
+    uptr top = size_ - blocks_ * ClockBlock::kClockCount;
+    CHECK_LT(top, ClockBlock::kClockCount);
+    const uptr move = top * sizeof(tab_->clock[0]);
+    internal_memcpy(&new_cb->clock[0], tab_->clock, move);
+    internal_memset(&new_cb->clock[top], 0, sizeof(*new_cb) - move);
+    internal_memset(tab_->clock, 0, move);
+    append_block(idx);
+  }
+  // At this point we have first level table allocated and all clock elements
+  // are evacuated from it to a second level block.
+  // Add second level tables as necessary.
+  while (nclk > capacity()) {
+    u32 idx = ctx->clock_alloc.Alloc(c);
+    ClockBlock *cb = ctx->clock_alloc.Map(idx);
+    internal_memset(cb, 0, sizeof(*cb));
+    append_block(idx);
+  }
+  size_ = nclk;
+}
+
+// Flushes all dirty elements into the main clock array.
+void SyncClock::FlushDirty() {
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    Dirty *dirty = &dirty_[i];
+    if (dirty->tid() != kInvalidTid) {
+      CHECK_LT(dirty->tid(), size_);
+      elem(dirty->tid()).epoch = dirty->epoch;
+      dirty->set_tid(kInvalidTid);
+    }
+  }
+}
+
+bool SyncClock::IsShared() const {
+  if (size_ == 0)
+    return false;
+  atomic_uint32_t *ref = ref_ptr(tab_);
+  u32 v = atomic_load(ref, memory_order_acquire);
+  CHECK_GT(v, 0);
+  return v > 1;
+}
+
+// Unshares the current clock if it's shared.
+// Shared clocks are immutable, so they need to be unshared before any updates.
+// Note: this does not apply to dirty entries as they are not shared.
+void SyncClock::Unshare(ClockCache *c) {
+  if (!IsShared())
+    return;
+  // First, copy current state into old.
+  SyncClock old;
+  old.tab_ = tab_;
+  old.tab_idx_ = tab_idx_;
+  old.size_ = size_;
+  old.blocks_ = blocks_;
+  old.release_store_tid_ = release_store_tid_;
+  old.release_store_reused_ = release_store_reused_;
+  for (unsigned i = 0; i < kDirtyTids; i++)
+    old.dirty_[i] = dirty_[i];
+  // Then, clear current object.
+  ResetImpl();
+  // Allocate brand new clock in the current object.
+  Resize(c, old.size_);
+  // Now copy state back into this object.
+  Iter old_iter(&old);
+  for (ClockElem &ce : *this) {
+    ce = *old_iter;
+    ++old_iter;
+  }
+  release_store_tid_ = old.release_store_tid_;
+  release_store_reused_ = old.release_store_reused_;
+  for (unsigned i = 0; i < kDirtyTids; i++)
+    dirty_[i] = old.dirty_[i];
+  // Drop reference to old and delete if necessary.
+  old.Reset(c);
+}
+
+// Can we cache this clock for future release operations?
+ALWAYS_INLINE bool SyncClock::Cachable() const {
+  if (size_ == 0)
+    return false;
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    if (dirty_[i].tid() != kInvalidTid)
+      return false;
+  }
+  return atomic_load_relaxed(ref_ptr(tab_)) == 1;
+}
+
+// elem linearizes the two-level structure into linear array.
+// Note: this is used only for one time accesses, vector operations use
+// the iterator as it is much faster.
+ALWAYS_INLINE ClockElem &SyncClock::elem(unsigned tid) const {
+  DCHECK_LT(tid, size_);
+  const uptr block = tid / ClockBlock::kClockCount;
+  DCHECK_LE(block, blocks_);
+  tid %= ClockBlock::kClockCount;
+  if (block == blocks_)
+    return tab_->clock[tid];
+  u32 idx = get_block(block);
+  ClockBlock *cb = ctx->clock_alloc.Map(idx);
+  return cb->clock[tid];
+}
+
+ALWAYS_INLINE uptr SyncClock::capacity() const {
+  if (size_ == 0)
+    return 0;
+  uptr ratio = sizeof(ClockBlock::clock[0]) / sizeof(ClockBlock::table[0]);
+  // How many clock elements we can fit into the first level block.
+  // +1 for ref counter.
+  uptr top = ClockBlock::kClockCount - RoundUpTo(blocks_ + 1, ratio) / ratio;
+  return blocks_ * ClockBlock::kClockCount + top;
+}
+
+ALWAYS_INLINE u32 SyncClock::get_block(uptr bi) const {
+  DCHECK(size_);
+  DCHECK_LT(bi, blocks_);
+  return tab_->table[ClockBlock::kBlockIdx - bi];
+}
+
+ALWAYS_INLINE void SyncClock::append_block(u32 idx) {
+  uptr bi = blocks_++;
+  CHECK_EQ(get_block(bi), 0);
+  tab_->table[ClockBlock::kBlockIdx - bi] = idx;
+}
+
+// Used only by tests.
+u64 SyncClock::get(unsigned tid) const {
+  for (unsigned i = 0; i < kDirtyTids; i++) {
+    Dirty dirty = dirty_[i];
+    if (dirty.tid() == tid)
+      return dirty.epoch;
+  }
+  return elem(tid).epoch;
+}
+
+// Used only by Iter test.
+u64 SyncClock::get_clean(unsigned tid) const {
+  return elem(tid).epoch;
+}
+
+void SyncClock::DebugDump(int(*printf)(const char *s, ...)) {
+  printf("clock=[");
+  for (uptr i = 0; i < size_; i++)
+    printf("%s%llu", i == 0 ? "" : ",", elem(i).epoch);
+  printf("] reused=[");
+  for (uptr i = 0; i < size_; i++)
+    printf("%s%llu", i == 0 ? "" : ",", elem(i).reused);
+  printf("] release_store_tid=%d/%d dirty_tids=%d[%llu]/%d[%llu]",
+         release_store_tid_, release_store_reused_, dirty_[0].tid(),
+         dirty_[0].epoch, dirty_[1].tid(), dirty_[1].epoch);
+}
+
+void SyncClock::Iter::Next() {
+  // Finished with the current block, move on to the next one.
+  block_++;
+  if (block_ < parent_->blocks_) {
+    // Iterate over the next second level block.
+    u32 idx = parent_->get_block(block_);
+    ClockBlock *cb = ctx->clock_alloc.Map(idx);
+    pos_ = &cb->clock[0];
+    end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
+        ClockBlock::kClockCount);
+    return;
+  }
+  if (block_ == parent_->blocks_ &&
+      parent_->size_ > parent_->blocks_ * ClockBlock::kClockCount) {
+    // Iterate over elements in the first level block.
+    pos_ = &parent_->tab_->clock[0];
+    end_ = pos_ + min(parent_->size_ - block_ * ClockBlock::kClockCount,
+        ClockBlock::kClockCount);
+    return;
+  }
+  parent_ = nullptr;  // denotes end
+}
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_clock.h b/compiler-rt/lib/tsan/rtl-old/tsan_clock.h
new file mode 100644
index 0000000000000..11cbc0c0b86b6
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_clock.h
@@ -0,0 +1,293 @@
+//===-- tsan_clock.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_CLOCK_H
+#define TSAN_CLOCK_H
+
+#include "tsan_defs.h"
+#include "tsan_dense_alloc.h"
+
+namespace __tsan {
+
+typedef DenseSlabAlloc<ClockBlock, 1 << 22, 1 << 10> ClockAlloc;
+typedef DenseSlabAllocCache ClockCache;
+
+// The clock that lives in sync variables (mutexes, atomics, etc).
+class SyncClock {
+ public:
+  SyncClock();
+  ~SyncClock();
+
+  uptr size() const;
+
+  // These are used only in tests.
+  u64 get(unsigned tid) const;
+  u64 get_clean(unsigned tid) const;
+
+  void Resize(ClockCache *c, uptr nclk);
+  void Reset(ClockCache *c);
+
+  void DebugDump(int(*printf)(const char *s, ...));
+
+  // Clock element iterator.
+  // Note: it iterates only over the table without regard to dirty entries.
+  class Iter {
+   public:
+    explicit Iter(SyncClock* parent);
+    Iter& operator++();
+    bool operator!=(const Iter& other);
+    ClockElem &operator*();
+
+   private:
+    SyncClock *parent_;
+    // [pos_, end_) is the current continuous range of clock elements.
+    ClockElem *pos_;
+    ClockElem *end_;
+    int block_;  // Current number of second level block.
+
+    NOINLINE void Next();
+  };
+
+  Iter begin();
+  Iter end();
+
+ private:
+  friend class ThreadClock;
+  friend class Iter;
+  static const uptr kDirtyTids = 2;
+
+  struct Dirty {
+    u32 tid() const { return tid_ == kShortInvalidTid ? kInvalidTid : tid_; }
+    void set_tid(u32 tid) {
+      tid_ = tid == kInvalidTid ? kShortInvalidTid : tid;
+    }
+    u64 epoch : kClkBits;
+
+   private:
+    // Full kInvalidTid won't fit into Dirty::tid.
+    static const u64 kShortInvalidTid = (1ull << (64 - kClkBits)) - 1;
+    u64 tid_ : 64 - kClkBits;  // kInvalidId if not active
+  };
+
+  static_assert(sizeof(Dirty) == 8, "Dirty is not 64bit");
+
+  unsigned release_store_tid_;
+  unsigned release_store_reused_;
+  Dirty dirty_[kDirtyTids];
+  // If size_ is 0, tab_ is nullptr.
+  // If size <= 64 (kClockCount), tab_ contains pointer to an array with
+  // 64 ClockElem's (ClockBlock::clock).
+  // Otherwise, tab_ points to an array with up to 127 u32 elements,
+  // each pointing to the second-level 512b block with 64 ClockElem's.
+  // Unused space in the first level ClockBlock is used to store additional
+  // clock elements.
+  // The last u32 element in the first level ClockBlock is always used as
+  // reference counter.
+  //
+  // See the following scheme for details.
+  // All memory blocks are 512 bytes (allocated from ClockAlloc).
+  // Clock (clk) elements are 64 bits.
+  // Idx and ref are 32 bits.
+  //
+  // tab_
+  //    |
+  //    \/
+  //    +----------------------------------------------------+
+  //    | clk128 | clk129 | ...unused... | idx1 | idx0 | ref |
+  //    +----------------------------------------------------+
+  //                                        |      |
+  //                                        |      \/
+  //                                        |      +----------------+
+  //                                        |      | clk0 ... clk63 |
+  //                                        |      +----------------+
+  //                                        \/
+  //                                        +------------------+
+  //                                        | clk64 ... clk127 |
+  //                                        +------------------+
+  //
+  // Note: dirty entries, if active, always override what's stored in the clock.
+  ClockBlock *tab_;
+  u32 tab_idx_;
+  u16 size_;
+  u16 blocks_;  // Number of second level blocks.
+
+  void Unshare(ClockCache *c);
+  bool IsShared() const;
+  bool Cachable() const;
+  void ResetImpl();
+  void FlushDirty();
+  uptr capacity() const;
+  u32 get_block(uptr bi) const;
+  void append_block(u32 idx);
+  ClockElem &elem(unsigned tid) const;
+};
+
+// The clock that lives in threads.
+class ThreadClock {
+ public:
+  typedef DenseSlabAllocCache Cache;
+
+  explicit ThreadClock(unsigned tid, unsigned reused = 0);
+
+  u64 get(unsigned tid) const;
+  void set(ClockCache *c, unsigned tid, u64 v);
+  void set(u64 v);
+  void tick();
+  uptr size() const;
+
+  void acquire(ClockCache *c, SyncClock *src);
+  void releaseStoreAcquire(ClockCache *c, SyncClock *src);
+  void release(ClockCache *c, SyncClock *dst);
+  void acq_rel(ClockCache *c, SyncClock *dst);
+  void ReleaseStore(ClockCache *c, SyncClock *dst);
+  void ResetCached(ClockCache *c);
+  void NoteGlobalAcquire(u64 v);
+
+  void DebugReset();
+  void DebugDump(int(*printf)(const char *s, ...));
+
+ private:
+  static const uptr kDirtyTids = SyncClock::kDirtyTids;
+  // Index of the thread associated with he clock ("current thread").
+  const unsigned tid_;
+  const unsigned reused_;  // tid_ reuse count.
+  // Current thread time when it acquired something from other threads.
+  u64 last_acquire_;
+
+  // Last time another thread has done a global acquire of this thread's clock.
+  // It helps to avoid problem described in:
+  // https://github.com/golang/go/issues/39186
+  // See test/tsan/java_finalizer2.cpp for a regression test.
+  // Note the failuire is _extremely_ hard to hit, so if you are trying
+  // to reproduce it, you may want to run something like:
+  // $ go get golang.org/x/tools/cmd/stress
+  // $ stress -p=64 ./a.out
+  //
+  // The crux of the problem is roughly as follows.
+  // A number of O(1) optimizations in the clocks algorithm assume proper
+  // transitive cumulative propagation of clock values. The AcquireGlobal
+  // operation may produce an inconsistent non-linearazable view of
+  // thread clocks. Namely, it may acquire a later value from a thread
+  // with a higher ID, but fail to acquire an earlier value from a thread
+  // with a lower ID. If a thread that executed AcquireGlobal then releases
+  // to a sync clock, it will spoil the sync clock with the inconsistent
+  // values. If another thread later releases to the sync clock, the optimized
+  // algorithm may break.
+  //
+  // The exact sequence of events that leads to the failure.
+  // - thread 1 executes AcquireGlobal
+  // - thread 1 acquires value 1 for thread 2
+  // - thread 2 increments clock to 2
+  // - thread 2 releases to sync object 1
+  // - thread 3 at time 1
+  // - thread 3 acquires from sync object 1
+  // - thread 3 increments clock to 2
+  // - thread 1 acquires value 2 for thread 3
+  // - thread 1 releases to sync object 2
+  // - sync object 2 clock has 1 for thread 2 and 2 for thread 3
+  // - thread 3 releases to sync object 2
+  // - thread 3 sees value 2 in the clock for itself
+  //   and decides that it has already released to the clock
+  //   and did not acquire anything from other threads after that
+  //   (the last_acquire_ check in release operation)
+  // - thread 3 does not update the value for thread 2 in the clock from 1 to 2
+  // - thread 4 acquires from sync object 2
+  // - thread 4 detects a false race with thread 2
+  //   as it should have been synchronized with thread 2 up to time 2,
+  //   but because of the broken clock it is now synchronized only up to time 1
+  //
+  // The global_acquire_ value helps to prevent this scenario.
+  // Namely, thread 3 will not trust any own clock values up to global_acquire_
+  // for the purposes of the last_acquire_ optimization.
+  atomic_uint64_t global_acquire_;
+
+  // Cached SyncClock (without dirty entries and release_store_tid_).
+  // We reuse it for subsequent store-release operations without intervening
+  // acquire operations. Since it is shared (and thus constant), clock value
+  // for the current thread is then stored in dirty entries in the SyncClock.
+  // We host a reference to the table while it is cached here.
+  u32 cached_idx_;
+  u16 cached_size_;
+  u16 cached_blocks_;
+
+  // Number of active elements in the clk_ table (the rest is zeros).
+  uptr nclk_;
+  u64 clk_[kMaxTidInClock];  // Fixed size vector clock.
+
+  bool IsAlreadyAcquired(const SyncClock *src) const;
+  bool HasAcquiredAfterRelease(const SyncClock *dst) const;
+  void UpdateCurrentThread(ClockCache *c, SyncClock *dst) const;
+};
+
+ALWAYS_INLINE u64 ThreadClock::get(unsigned tid) const {
+  DCHECK_LT(tid, kMaxTidInClock);
+  return clk_[tid];
+}
+
+ALWAYS_INLINE void ThreadClock::set(u64 v) {
+  DCHECK_GE(v, clk_[tid_]);
+  clk_[tid_] = v;
+}
+
+ALWAYS_INLINE void ThreadClock::tick() {
+  clk_[tid_]++;
+}
+
+ALWAYS_INLINE uptr ThreadClock::size() const {
+  return nclk_;
+}
+
+ALWAYS_INLINE void ThreadClock::NoteGlobalAcquire(u64 v) {
+  // Here we rely on the fact that AcquireGlobal is protected by
+  // ThreadRegistryLock, thus only one thread at a time executes it
+  // and values passed to this function should not go backwards.
+  CHECK_LE(atomic_load_relaxed(&global_acquire_), v);
+  atomic_store_relaxed(&global_acquire_, v);
+}
+
+ALWAYS_INLINE SyncClock::Iter SyncClock::begin() {
+  return Iter(this);
+}
+
+ALWAYS_INLINE SyncClock::Iter SyncClock::end() {
+  return Iter(nullptr);
+}
+
+ALWAYS_INLINE uptr SyncClock::size() const {
+  return size_;
+}
+
+ALWAYS_INLINE SyncClock::Iter::Iter(SyncClock* parent)
+    : parent_(parent)
+    , pos_(nullptr)
+    , end_(nullptr)
+    , block_(-1) {
+  if (parent)
+    Next();
+}
+
+ALWAYS_INLINE SyncClock::Iter& SyncClock::Iter::operator++() {
+  pos_++;
+  if (UNLIKELY(pos_ >= end_))
+    Next();
+  return *this;
+}
+
+ALWAYS_INLINE bool SyncClock::Iter::operator!=(const SyncClock::Iter& other) {
+  return parent_ != other.parent_;
+}
+
+ALWAYS_INLINE ClockElem &SyncClock::Iter::operator*() {
+  return *pos_;
+}
+}  // namespace __tsan
+
+#endif  // TSAN_CLOCK_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_debugging.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_debugging.cpp
new file mode 100644
index 0000000000000..1d3c3849a4463
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_debugging.cpp
@@ -0,0 +1,262 @@
+//===-- tsan_debugging.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// TSan debugging API implementation.
+//===----------------------------------------------------------------------===//
+#include "tsan_interface.h"
+#include "tsan_report.h"
+#include "tsan_rtl.h"
+
+#include "sanitizer_common/sanitizer_stackdepot.h"
+
+using namespace __tsan;
+
+static const char *ReportTypeDescription(ReportType typ) {
+  switch (typ) {
+    case ReportTypeRace: return "data-race";
+    case ReportTypeVptrRace: return "data-race-vptr";
+    case ReportTypeUseAfterFree: return "heap-use-after-free";
+    case ReportTypeVptrUseAfterFree: return "heap-use-after-free-vptr";
+    case ReportTypeExternalRace: return "external-race";
+    case ReportTypeThreadLeak: return "thread-leak";
+    case ReportTypeMutexDestroyLocked: return "locked-mutex-destroy";
+    case ReportTypeMutexDoubleLock: return "mutex-double-lock";
+    case ReportTypeMutexInvalidAccess: return "mutex-invalid-access";
+    case ReportTypeMutexBadUnlock: return "mutex-bad-unlock";
+    case ReportTypeMutexBadReadLock: return "mutex-bad-read-lock";
+    case ReportTypeMutexBadReadUnlock: return "mutex-bad-read-unlock";
+    case ReportTypeSignalUnsafe: return "signal-unsafe-call";
+    case ReportTypeErrnoInSignal: return "errno-in-signal-handler";
+    case ReportTypeDeadlock: return "lock-order-inversion";
+    // No default case so compiler warns us if we miss one
+  }
+  UNREACHABLE("missing case");
+}
+
+static const char *ReportLocationTypeDescription(ReportLocationType typ) {
+  switch (typ) {
+    case ReportLocationGlobal: return "global";
+    case ReportLocationHeap: return "heap";
+    case ReportLocationStack: return "stack";
+    case ReportLocationTLS: return "tls";
+    case ReportLocationFD: return "fd";
+    // No default case so compiler warns us if we miss one
+  }
+  UNREACHABLE("missing case");
+}
+
+static void CopyTrace(SymbolizedStack *first_frame, void **trace,
+                      uptr trace_size) {
+  uptr i = 0;
+  for (SymbolizedStack *frame = first_frame; frame != nullptr;
+       frame = frame->next) {
+    trace[i++] = (void *)frame->info.address;
+    if (i >= trace_size) break;
+  }
+}
+
+// Meant to be called by the debugger.
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__tsan_get_current_report() {
+  return const_cast<ReportDesc*>(cur_thread()->current_report);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_data(void *report, const char **description, int *count,
+                           int *stack_count, int *mop_count, int *loc_count,
+                           int *mutex_count, int *thread_count,
+                           int *unique_tid_count, void **sleep_trace,
+                           uptr trace_size) {
+  const ReportDesc *rep = (ReportDesc *)report;
+  *description = ReportTypeDescription(rep->typ);
+  *count = rep->count;
+  *stack_count = rep->stacks.Size();
+  *mop_count = rep->mops.Size();
+  *loc_count = rep->locs.Size();
+  *mutex_count = rep->mutexes.Size();
+  *thread_count = rep->threads.Size();
+  *unique_tid_count = rep->unique_tids.Size();
+  if (rep->sleep) CopyTrace(rep->sleep->frames, sleep_trace, trace_size);
+  return 1;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_tag(void *report, uptr *tag) {
+  const ReportDesc *rep = (ReportDesc *)report;
+  *tag = rep->tag;
+  return 1;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_stack(void *report, uptr idx, void **trace,
+                            uptr trace_size) {
+  const ReportDesc *rep = (ReportDesc *)report;
+  CHECK_LT(idx, rep->stacks.Size());
+  ReportStack *stack = rep->stacks[idx];
+  if (stack) CopyTrace(stack->frames, trace, trace_size);
+  return stack ? 1 : 0;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_mop(void *report, uptr idx, int *tid, void **addr,
+                          int *size, int *write, int *atomic, void **trace,
+                          uptr trace_size) {
+  const ReportDesc *rep = (ReportDesc *)report;
+  CHECK_LT(idx, rep->mops.Size());
+  ReportMop *mop = rep->mops[idx];
+  *tid = mop->tid;
+  *addr = (void *)mop->addr;
+  *size = mop->size;
+  *write = mop->write ? 1 : 0;
+  *atomic = mop->atomic ? 1 : 0;
+  if (mop->stack) CopyTrace(mop->stack->frames, trace, trace_size);
+  return 1;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_loc(void *report, uptr idx, const char **type,
+                          void **addr, uptr *start, uptr *size, int *tid,
+                          int *fd, int *suppressable, void **trace,
+                          uptr trace_size) {
+  const ReportDesc *rep = (ReportDesc *)report;
+  CHECK_LT(idx, rep->locs.Size());
+  ReportLocation *loc = rep->locs[idx];
+  *type = ReportLocationTypeDescription(loc->type);
+  *addr = (void *)loc->global.start;
+  *start = loc->heap_chunk_start;
+  *size = loc->heap_chunk_size;
+  *tid = loc->tid;
+  *fd = loc->fd;
+  *suppressable = loc->suppressable;
+  if (loc->stack) CopyTrace(loc->stack->frames, trace, trace_size);
+  return 1;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_loc_object_type(void *report, uptr idx,
+                                      const char **object_type) {
+  const ReportDesc *rep = (ReportDesc *)report;
+  CHECK_LT(idx, rep->locs.Size());
+  ReportLocation *loc = rep->locs[idx];
+  *object_type = GetObjectTypeFromTag(loc->external_tag);
+  return 1;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_mutex(void *report, uptr idx, uptr *mutex_id, void **addr,
+                            int *destroyed, void **trace, uptr trace_size) {
+  const ReportDesc *rep = (ReportDesc *)report;
+  CHECK_LT(idx, rep->mutexes.Size());
+  ReportMutex *mutex = rep->mutexes[idx];
+  *mutex_id = mutex->id;
+  *addr = (void *)mutex->addr;
+  *destroyed = mutex->destroyed;
+  if (mutex->stack) CopyTrace(mutex->stack->frames, trace, trace_size);
+  return 1;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_thread(void *report, uptr idx, int *tid, tid_t *os_id,
+                             int *running, const char **name, int *parent_tid,
+                             void **trace, uptr trace_size) {
+  const ReportDesc *rep = (ReportDesc *)report;
+  CHECK_LT(idx, rep->threads.Size());
+  ReportThread *thread = rep->threads[idx];
+  *tid = thread->id;
+  *os_id = thread->os_id;
+  *running = thread->running;
+  *name = thread->name;
+  *parent_tid = thread->parent_tid;
+  if (thread->stack) CopyTrace(thread->stack->frames, trace, trace_size);
+  return 1;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_unique_tid(void *report, uptr idx, int *tid) {
+  const ReportDesc *rep = (ReportDesc *)report;
+  CHECK_LT(idx, rep->unique_tids.Size());
+  *tid = rep->unique_tids[idx];
+  return 1;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+const char *__tsan_locate_address(uptr addr, char *name, uptr name_size,
+                                  uptr *region_address_ptr,
+                                  uptr *region_size_ptr) {
+  uptr region_address = 0;
+  uptr region_size = 0;
+  const char *region_kind = nullptr;
+  if (name && name_size > 0) name[0] = 0;
+
+  if (IsMetaMem(reinterpret_cast<u32 *>(addr))) {
+    region_kind = "meta shadow";
+  } else if (IsShadowMem(reinterpret_cast<RawShadow *>(addr))) {
+    region_kind = "shadow";
+  } else {
+    bool is_stack = false;
+    MBlock *b = 0;
+    Allocator *a = allocator();
+    if (a->PointerIsMine((void *)addr)) {
+      void *block_begin = a->GetBlockBegin((void *)addr);
+      if (block_begin) b = ctx->metamap.GetBlock((uptr)block_begin);
+    }
+
+    if (b != 0) {
+      region_address = (uptr)allocator()->GetBlockBegin((void *)addr);
+      region_size = b->siz;
+      region_kind = "heap";
+    } else {
+      // TODO(kuba.brecka): We should not lock. This is supposed to be called
+      // from within the debugger when other threads are stopped.
+      ctx->thread_registry.Lock();
+      ThreadContext *tctx = IsThreadStackOrTls(addr, &is_stack);
+      ctx->thread_registry.Unlock();
+      if (tctx) {
+        region_kind = is_stack ? "stack" : "tls";
+      } else {
+        region_kind = "global";
+        DataInfo info;
+        if (Symbolizer::GetOrInit()->SymbolizeData(addr, &info)) {
+          internal_strncpy(name, info.name, name_size);
+          region_address = info.start;
+          region_size = info.size;
+        }
+      }
+    }
+  }
+
+  CHECK(region_kind);
+  if (region_address_ptr) *region_address_ptr = region_address;
+  if (region_size_ptr) *region_size_ptr = region_size;
+  return region_kind;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
+                           tid_t *os_id) {
+  MBlock *b = 0;
+  Allocator *a = allocator();
+  if (a->PointerIsMine((void *)addr)) {
+    void *block_begin = a->GetBlockBegin((void *)addr);
+    if (block_begin) b = ctx->metamap.GetBlock((uptr)block_begin);
+  }
+  if (b == 0) return 0;
+
+  *thread_id = b->tid;
+  // No locking.  This is supposed to be called from within the debugger when
+  // other threads are stopped.
+  ThreadContextBase *tctx = ctx->thread_registry.GetThreadLocked(b->tid);
+  *os_id = tctx->os_id;
+
+  StackTrace stack = StackDepotGet(b->stk);
+  size = Min(size, (uptr)stack.size);
+  for (uptr i = 0; i < size; i++) trace[i] = stack.trace[stack.size - i - 1];
+  return size;
+}

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_defs.h b/compiler-rt/lib/tsan/rtl-old/tsan_defs.h
new file mode 100644
index 0000000000000..4712c2be1813e
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_defs.h
@@ -0,0 +1,236 @@
+//===-- tsan_defs.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_DEFS_H
+#define TSAN_DEFS_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include "ubsan/ubsan_platform.h"
+
+#ifndef TSAN_VECTORIZE
+#  define TSAN_VECTORIZE __SSE4_2__
+#endif
+
+#if TSAN_VECTORIZE
+// <emmintrin.h> transitively includes <stdlib.h>,
+// and it's prohibited to include std headers into tsan runtime.
+// So we do this dirty trick.
+#  define _MM_MALLOC_H_INCLUDED
+#  define __MM_MALLOC_H
+#  include <emmintrin.h>
+#  include <smmintrin.h>
+#  define VECTOR_ALIGNED ALIGNED(16)
+typedef __m128i m128;
+#else
+#  define VECTOR_ALIGNED
+#endif
+
+// Setup defaults for compile definitions.
+#ifndef TSAN_NO_HISTORY
+# define TSAN_NO_HISTORY 0
+#endif
+
+#ifndef TSAN_CONTAINS_UBSAN
+# if CAN_SANITIZE_UB && !SANITIZER_GO
+#  define TSAN_CONTAINS_UBSAN 1
+# else
+#  define TSAN_CONTAINS_UBSAN 0
+# endif
+#endif
+
+namespace __tsan {
+
+constexpr uptr kByteBits = 8;
+
+// Thread slot ID.
+enum class Sid : u8 {};
+constexpr uptr kThreadSlotCount = 256;
+constexpr Sid kFreeSid = static_cast<Sid>(255);
+
+// Abstract time unit, vector clock element.
+enum class Epoch : u16 {};
+constexpr uptr kEpochBits = 14;
+constexpr Epoch kEpochZero = static_cast<Epoch>(0);
+constexpr Epoch kEpochOver = static_cast<Epoch>(1 << kEpochBits);
+
+const int kClkBits = 42;
+const unsigned kMaxTidReuse = (1 << (64 - kClkBits)) - 1;
+
+struct ClockElem {
+  u64 epoch  : kClkBits;
+  u64 reused : 64 - kClkBits;  // tid reuse count
+};
+
+struct ClockBlock {
+  static const uptr kSize = 512;
+  static const uptr kTableSize = kSize / sizeof(u32);
+  static const uptr kClockCount = kSize / sizeof(ClockElem);
+  static const uptr kRefIdx = kTableSize - 1;
+  static const uptr kBlockIdx = kTableSize - 2;
+
+  union {
+    u32       table[kTableSize];
+    ClockElem clock[kClockCount];
+  };
+
+  ClockBlock() {
+  }
+};
+
+const int kTidBits = 13;
+// Reduce kMaxTid by kClockCount because one slot in ClockBlock table is
+// occupied by reference counter, so total number of elements we can store
+// in SyncClock is kClockCount * (kTableSize - 1).
+const unsigned kMaxTid = (1 << kTidBits) - ClockBlock::kClockCount;
+#if !SANITIZER_GO
+const unsigned kMaxTidInClock = kMaxTid * 2;  // This includes msb 'freed' bit.
+#else
+const unsigned kMaxTidInClock = kMaxTid;  // Go does not track freed memory.
+#endif
+const uptr kShadowStackSize = 64 * 1024;
+
+// Count of shadow values in a shadow cell.
+const uptr kShadowCnt = 4;
+
+// That many user bytes are mapped onto a single shadow cell.
+const uptr kShadowCell = 8;
+
+// Single shadow value.
+typedef u64 RawShadow;
+const uptr kShadowSize = sizeof(RawShadow);
+
+// Shadow memory is kShadowMultiplier times larger than user memory.
+const uptr kShadowMultiplier = kShadowSize * kShadowCnt / kShadowCell;
+
+// That many user bytes are mapped onto a single meta shadow cell.
+// Must be less or equal to minimal memory allocator alignment.
+const uptr kMetaShadowCell = 8;
+
+// Size of a single meta shadow value (u32).
+const uptr kMetaShadowSize = 4;
+
+// All addresses and PCs are assumed to be compressable to that many bits.
+const uptr kCompressedAddrBits = 44;
+
+#if TSAN_NO_HISTORY
+const bool kCollectHistory = false;
+#else
+const bool kCollectHistory = true;
+#endif
+
+// The following "build consistency" machinery ensures that all source files
+// are built in the same configuration. Inconsistent builds lead to
+// hard to debug crashes.
+#if SANITIZER_DEBUG
+void build_consistency_debug();
+#else
+void build_consistency_release();
+#endif
+
+static inline void USED build_consistency() {
+#if SANITIZER_DEBUG
+  build_consistency_debug();
+#else
+  build_consistency_release();
+#endif
+}
+
+template<typename T>
+T min(T a, T b) {
+  return a < b ? a : b;
+}
+
+template<typename T>
+T max(T a, T b) {
+  return a > b ? a : b;
+}
+
+template<typename T>
+T RoundUp(T p, u64 align) {
+  DCHECK_EQ(align & (align - 1), 0);
+  return (T)(((u64)p + align - 1) & ~(align - 1));
+}
+
+template<typename T>
+T RoundDown(T p, u64 align) {
+  DCHECK_EQ(align & (align - 1), 0);
+  return (T)((u64)p & ~(align - 1));
+}
+
+// Zeroizes high part, returns 'bits' lsb bits.
+template<typename T>
+T GetLsb(T v, int bits) {
+  return (T)((u64)v & ((1ull << bits) - 1));
+}
+
+struct MD5Hash {
+  u64 hash[2];
+  bool operator==(const MD5Hash &other) const;
+};
+
+MD5Hash md5_hash(const void *data, uptr size);
+
+struct Processor;
+struct ThreadState;
+class ThreadContext;
+struct Context;
+struct ReportStack;
+class ReportDesc;
+class RegionAlloc;
+
+typedef uptr AccessType;
+
+enum : AccessType {
+  kAccessWrite = 0,
+  kAccessRead = 1 << 0,
+  kAccessAtomic = 1 << 1,
+  kAccessVptr = 1 << 2,  // read or write of an object virtual table pointer
+  kAccessFree = 1 << 3,  // synthetic memory access during memory freeing
+  kAccessExternalPC = 1 << 4,  // access PC can have kExternalPCBit set
+};
+
+// Descriptor of user's memory block.
+struct MBlock {
+  u64  siz : 48;
+  u64  tag : 16;
+  StackID stk;
+  Tid tid;
+};
+
+COMPILER_CHECK(sizeof(MBlock) == 16);
+
+enum ExternalTag : uptr {
+  kExternalTagNone = 0,
+  kExternalTagSwiftModifyingAccess = 1,
+  kExternalTagFirstUserAvailable = 2,
+  kExternalTagMax = 1024,
+  // Don't set kExternalTagMax over 65,536, since MBlock only stores tags
+  // as 16-bit values, see tsan_defs.h.
+};
+
+enum MutexType {
+  MutexTypeTrace = MutexLastCommon,
+  MutexTypeReport,
+  MutexTypeSyncVar,
+  MutexTypeAnnotations,
+  MutexTypeAtExit,
+  MutexTypeFired,
+  MutexTypeRacy,
+  MutexTypeGlobalProc,
+  MutexTypeInternalAlloc,
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_DEFS_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_dense_alloc.h b/compiler-rt/lib/tsan/rtl-old/tsan_dense_alloc.h
new file mode 100644
index 0000000000000..9e15f74a06152
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_dense_alloc.h
@@ -0,0 +1,156 @@
+//===-- tsan_dense_alloc.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// A DenseSlabAlloc is a freelist-based allocator of fixed-size objects.
+// DenseSlabAllocCache is a thread-local cache for DenseSlabAlloc.
+// The only 
diff erence with traditional slab allocators is that DenseSlabAlloc
+// allocates/free indices of objects and provide a functionality to map
+// the index onto the real pointer. The index is u32, that is, 2 times smaller
+// than uptr (hense the Dense prefix).
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_DENSE_ALLOC_H
+#define TSAN_DENSE_ALLOC_H
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+class DenseSlabAllocCache {
+  static const uptr kSize = 128;
+  typedef u32 IndexT;
+  uptr pos;
+  IndexT cache[kSize];
+  template <typename, uptr, uptr, u64>
+  friend class DenseSlabAlloc;
+};
+
+template <typename T, uptr kL1Size, uptr kL2Size, u64 kReserved = 0>
+class DenseSlabAlloc {
+ public:
+  typedef DenseSlabAllocCache Cache;
+  typedef typename Cache::IndexT IndexT;
+
+  static_assert((kL1Size & (kL1Size - 1)) == 0,
+                "kL1Size must be a power-of-two");
+  static_assert((kL2Size & (kL2Size - 1)) == 0,
+                "kL2Size must be a power-of-two");
+  static_assert((kL1Size * kL2Size) <= (1ull << (sizeof(IndexT) * 8)),
+                "kL1Size/kL2Size are too large");
+  static_assert(((kL1Size * kL2Size - 1) & kReserved) == 0,
+                "reserved bits don't fit");
+  static_assert(sizeof(T) > sizeof(IndexT),
+                "it doesn't make sense to use dense alloc");
+
+  DenseSlabAlloc(LinkerInitialized, const char *name) : name_(name) {}
+
+  explicit DenseSlabAlloc(const char *name)
+      : DenseSlabAlloc(LINKER_INITIALIZED, name) {
+    // It can be very large.
+    // Don't page it in for linker initialized objects.
+    internal_memset(map_, 0, sizeof(map_));
+  }
+
+  ~DenseSlabAlloc() {
+    for (uptr i = 0; i < kL1Size; i++) {
+      if (map_[i] != 0)
+        UnmapOrDie(map_[i], kL2Size * sizeof(T));
+    }
+  }
+
+  IndexT Alloc(Cache *c) {
+    if (c->pos == 0)
+      Refill(c);
+    return c->cache[--c->pos];
+  }
+
+  void Free(Cache *c, IndexT idx) {
+    DCHECK_NE(idx, 0);
+    if (c->pos == Cache::kSize)
+      Drain(c);
+    c->cache[c->pos++] = idx;
+  }
+
+  T *Map(IndexT idx) {
+    DCHECK_NE(idx, 0);
+    DCHECK_LE(idx, kL1Size * kL2Size);
+    return &map_[idx / kL2Size][idx % kL2Size];
+  }
+
+  void FlushCache(Cache *c) {
+    if (!c->pos)
+      return;
+    SpinMutexLock lock(&mtx_);
+    while (c->pos) {
+      IndexT idx = c->cache[--c->pos];
+      *(IndexT*)Map(idx) = freelist_;
+      freelist_ = idx;
+    }
+  }
+
+  void InitCache(Cache *c) {
+    c->pos = 0;
+    internal_memset(c->cache, 0, sizeof(c->cache));
+  }
+
+  uptr AllocatedMemory() const {
+    return atomic_load_relaxed(&fillpos_) * kL2Size * sizeof(T);
+  }
+
+ private:
+  T *map_[kL1Size];
+  SpinMutex mtx_;
+  IndexT freelist_ = {0};
+  atomic_uintptr_t fillpos_ = {0};
+  const char *const name_;
+
+  void Refill(Cache *c) {
+    SpinMutexLock lock(&mtx_);
+    if (freelist_ == 0) {
+      uptr fillpos = atomic_load_relaxed(&fillpos_);
+      if (fillpos == kL1Size) {
+        Printf("ThreadSanitizer: %s overflow (%zu*%zu). Dying.\n",
+            name_, kL1Size, kL2Size);
+        Die();
+      }
+      VPrintf(2, "ThreadSanitizer: growing %s: %zu out of %zu*%zu\n", name_,
+              fillpos, kL1Size, kL2Size);
+      T *batch = (T*)MmapOrDie(kL2Size * sizeof(T), name_);
+      // Reserve 0 as invalid index.
+      IndexT start = fillpos == 0 ? 1 : 0;
+      for (IndexT i = start; i < kL2Size; i++) {
+        new(batch + i) T;
+        *(IndexT *)(batch + i) = i + 1 + fillpos * kL2Size;
+      }
+      *(IndexT*)(batch + kL2Size - 1) = 0;
+      freelist_ = fillpos * kL2Size + start;
+      map_[fillpos] = batch;
+      atomic_store_relaxed(&fillpos_, fillpos + 1);
+    }
+    for (uptr i = 0; i < Cache::kSize / 2 && freelist_ != 0; i++) {
+      IndexT idx = freelist_;
+      c->cache[c->pos++] = idx;
+      freelist_ = *(IndexT*)Map(idx);
+    }
+  }
+
+  void Drain(Cache *c) {
+    SpinMutexLock lock(&mtx_);
+    for (uptr i = 0; i < Cache::kSize / 2; i++) {
+      IndexT idx = c->cache[--c->pos];
+      *(IndexT*)Map(idx) = freelist_;
+      freelist_ = idx;
+    }
+  }
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_DENSE_ALLOC_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_dispatch_defs.h b/compiler-rt/lib/tsan/rtl-old/tsan_dispatch_defs.h
new file mode 100644
index 0000000000000..94e0b50fed360
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_dispatch_defs.h
@@ -0,0 +1,73 @@
+//===-- tsan_dispatch_defs.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_DISPATCH_DEFS_H
+#define TSAN_DISPATCH_DEFS_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+typedef struct dispatch_object_s {} *dispatch_object_t;
+
+#define DISPATCH_DECL(name) \
+  typedef struct name##_s : public dispatch_object_s {} *name##_t
+
+DISPATCH_DECL(dispatch_queue);
+DISPATCH_DECL(dispatch_source);
+DISPATCH_DECL(dispatch_group);
+DISPATCH_DECL(dispatch_data);
+DISPATCH_DECL(dispatch_semaphore);
+DISPATCH_DECL(dispatch_io);
+
+typedef void (*dispatch_function_t)(void *arg);
+typedef void (^dispatch_block_t)(void);
+typedef void (^dispatch_io_handler_t)(bool done, dispatch_data_t data,
+                                      int error);
+
+typedef long dispatch_once_t;
+typedef __sanitizer::u64 dispatch_time_t;
+typedef int dispatch_fd_t;
+typedef unsigned long dispatch_io_type_t;
+typedef unsigned long dispatch_io_close_flags_t;
+
+extern "C" {
+void *dispatch_get_context(dispatch_object_t object);
+void dispatch_retain(dispatch_object_t object);
+void dispatch_release(dispatch_object_t object);
+
+extern const dispatch_block_t _dispatch_data_destructor_free;
+extern const dispatch_block_t _dispatch_data_destructor_munmap;
+} // extern "C"
+
+#define DISPATCH_DATA_DESTRUCTOR_DEFAULT nullptr
+#define DISPATCH_DATA_DESTRUCTOR_FREE    _dispatch_data_destructor_free
+#define DISPATCH_DATA_DESTRUCTOR_MUNMAP  _dispatch_data_destructor_munmap
+
+#if __has_attribute(noescape)
+# define DISPATCH_NOESCAPE __attribute__((__noescape__))
+#else
+# define DISPATCH_NOESCAPE
+#endif
+
+#if SANITIZER_MAC
+# define SANITIZER_WEAK_IMPORT extern "C" __attribute((weak_import))
+#else
+# define SANITIZER_WEAK_IMPORT extern "C" __attribute((weak))
+#endif
+
+
+// Data types used in dispatch APIs
+typedef unsigned long size_t;
+typedef unsigned long uintptr_t;
+typedef __sanitizer::s64 off_t;
+typedef __sanitizer::u16 mode_t;
+typedef long long_t;
+
+#endif  // TSAN_DISPATCH_DEFS_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_external.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_external.cpp
new file mode 100644
index 0000000000000..19ae174f20a59
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_external.cpp
@@ -0,0 +1,126 @@
+//===-- tsan_external.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_rtl.h"
+#include "sanitizer_common/sanitizer_ptrauth.h"
+
+#if !SANITIZER_GO
+#  include "tsan_interceptors.h"
+#endif
+
+namespace __tsan {
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+struct TagData {
+  const char *object_type;
+  const char *header;
+};
+
+static TagData registered_tags[kExternalTagMax] = {
+  {},
+  {"Swift variable", "Swift access race"},
+};
+static atomic_uint32_t used_tags{kExternalTagFirstUserAvailable};
+static TagData *GetTagData(uptr tag) {
+  // Invalid/corrupted tag?  Better return NULL and let the caller deal with it.
+  if (tag >= atomic_load(&used_tags, memory_order_relaxed)) return nullptr;
+  return &registered_tags[tag];
+}
+
+const char *GetObjectTypeFromTag(uptr tag) {
+  TagData *tag_data = GetTagData(tag);
+  return tag_data ? tag_data->object_type : nullptr;
+}
+
+const char *GetReportHeaderFromTag(uptr tag) {
+  TagData *tag_data = GetTagData(tag);
+  return tag_data ? tag_data->header : nullptr;
+}
+
+void InsertShadowStackFrameForTag(ThreadState *thr, uptr tag) {
+  FuncEntry(thr, (uptr)&registered_tags[tag]);
+}
+
+uptr TagFromShadowStackFrame(uptr pc) {
+  uptr tag_count = atomic_load(&used_tags, memory_order_relaxed);
+  void *pc_ptr = (void *)pc;
+  if (pc_ptr < GetTagData(0) || pc_ptr > GetTagData(tag_count - 1))
+    return 0;
+  return (TagData *)pc_ptr - GetTagData(0);
+}
+
+#if !SANITIZER_GO
+
+void ExternalAccess(void *addr, uptr caller_pc, void *tag, AccessType typ) {
+  CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed));
+  ThreadState *thr = cur_thread();
+  if (caller_pc) FuncEntry(thr, caller_pc);
+  InsertShadowStackFrameForTag(thr, (uptr)tag);
+  bool in_ignored_lib;
+  if (!caller_pc || !libignore()->IsIgnored(caller_pc, &in_ignored_lib))
+    MemoryAccess(thr, CALLERPC, (uptr)addr, 1, typ);
+  FuncExit(thr);
+  if (caller_pc) FuncExit(thr);
+}
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__tsan_external_register_tag(const char *object_type) {
+  uptr new_tag = atomic_fetch_add(&used_tags, 1, memory_order_relaxed);
+  CHECK_LT(new_tag, kExternalTagMax);
+  GetTagData(new_tag)->object_type = internal_strdup(object_type);
+  char header[127] = {0};
+  internal_snprintf(header, sizeof(header), "race on %s", object_type);
+  GetTagData(new_tag)->header = internal_strdup(header);
+  return (void *)new_tag;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_register_header(void *tag, const char *header) {
+  CHECK_GE((uptr)tag, kExternalTagFirstUserAvailable);
+  CHECK_LT((uptr)tag, kExternalTagMax);
+  atomic_uintptr_t *header_ptr =
+      (atomic_uintptr_t *)&GetTagData((uptr)tag)->header;
+  header = internal_strdup(header);
+  char *old_header =
+      (char *)atomic_exchange(header_ptr, (uptr)header, memory_order_seq_cst);
+  Free(old_header);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_assign_tag(void *addr, void *tag) {
+  CHECK_LT(tag, atomic_load(&used_tags, memory_order_relaxed));
+  Allocator *a = allocator();
+  MBlock *b = nullptr;
+  if (a->PointerIsMine((void *)addr)) {
+    void *block_begin = a->GetBlockBegin((void *)addr);
+    if (block_begin) b = ctx->metamap.GetBlock((uptr)block_begin);
+  }
+  if (b) {
+    b->tag = (uptr)tag;
+  }
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_read(void *addr, void *caller_pc, void *tag) {
+  ExternalAccess(addr, STRIP_PAC_PC(caller_pc), tag, kAccessRead);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_write(void *addr, void *caller_pc, void *tag) {
+  ExternalAccess(addr, STRIP_PAC_PC(caller_pc), tag, kAccessWrite);
+}
+}  // extern "C"
+
+#endif  // !SANITIZER_GO
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_fd.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_fd.cpp
new file mode 100644
index 0000000000000..255ffa8daf760
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_fd.cpp
@@ -0,0 +1,316 @@
+//===-- tsan_fd.cpp -------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_fd.h"
+#include "tsan_rtl.h"
+#include <sanitizer_common/sanitizer_atomic.h>
+
+namespace __tsan {
+
+const int kTableSizeL1 = 1024;
+const int kTableSizeL2 = 1024;
+const int kTableSize = kTableSizeL1 * kTableSizeL2;
+
+struct FdSync {
+  atomic_uint64_t rc;
+};
+
+struct FdDesc {
+  FdSync *sync;
+  Tid creation_tid;
+  StackID creation_stack;
+};
+
+struct FdContext {
+  atomic_uintptr_t tab[kTableSizeL1];
+  // Addresses used for synchronization.
+  FdSync globsync;
+  FdSync filesync;
+  FdSync socksync;
+  u64 connectsync;
+};
+
+static FdContext fdctx;
+
+static bool bogusfd(int fd) {
+  // Apparently a bogus fd value.
+  return fd < 0 || fd >= kTableSize;
+}
+
+static FdSync *allocsync(ThreadState *thr, uptr pc) {
+  FdSync *s = (FdSync*)user_alloc_internal(thr, pc, sizeof(FdSync),
+      kDefaultAlignment, false);
+  atomic_store(&s->rc, 1, memory_order_relaxed);
+  return s;
+}
+
+static FdSync *ref(FdSync *s) {
+  if (s && atomic_load(&s->rc, memory_order_relaxed) != (u64)-1)
+    atomic_fetch_add(&s->rc, 1, memory_order_relaxed);
+  return s;
+}
+
+static void unref(ThreadState *thr, uptr pc, FdSync *s) {
+  if (s && atomic_load(&s->rc, memory_order_relaxed) != (u64)-1) {
+    if (atomic_fetch_sub(&s->rc, 1, memory_order_acq_rel) == 1) {
+      CHECK_NE(s, &fdctx.globsync);
+      CHECK_NE(s, &fdctx.filesync);
+      CHECK_NE(s, &fdctx.socksync);
+      user_free(thr, pc, s, false);
+    }
+  }
+}
+
+static FdDesc *fddesc(ThreadState *thr, uptr pc, int fd) {
+  CHECK_GE(fd, 0);
+  CHECK_LT(fd, kTableSize);
+  atomic_uintptr_t *pl1 = &fdctx.tab[fd / kTableSizeL2];
+  uptr l1 = atomic_load(pl1, memory_order_consume);
+  if (l1 == 0) {
+    uptr size = kTableSizeL2 * sizeof(FdDesc);
+    // We need this to reside in user memory to properly catch races on it.
+    void *p = user_alloc_internal(thr, pc, size, kDefaultAlignment, false);
+    internal_memset(p, 0, size);
+    MemoryResetRange(thr, (uptr)&fddesc, (uptr)p, size);
+    if (atomic_compare_exchange_strong(pl1, &l1, (uptr)p, memory_order_acq_rel))
+      l1 = (uptr)p;
+    else
+      user_free(thr, pc, p, false);
+  }
+  FdDesc *fds = reinterpret_cast<FdDesc *>(l1);
+  return &fds[fd % kTableSizeL2];
+}
+
+// pd must be already ref'ed.
+static void init(ThreadState *thr, uptr pc, int fd, FdSync *s,
+    bool write = true) {
+  FdDesc *d = fddesc(thr, pc, fd);
+  // As a matter of fact, we don't intercept all close calls.
+  // See e.g. libc __res_iclose().
+  if (d->sync) {
+    unref(thr, pc, d->sync);
+    d->sync = 0;
+  }
+  if (flags()->io_sync == 0) {
+    unref(thr, pc, s);
+  } else if (flags()->io_sync == 1) {
+    d->sync = s;
+  } else if (flags()->io_sync == 2) {
+    unref(thr, pc, s);
+    d->sync = &fdctx.globsync;
+  }
+  d->creation_tid = thr->tid;
+  d->creation_stack = CurrentStackId(thr, pc);
+  if (write) {
+    // To catch races between fd usage and open.
+    MemoryRangeImitateWrite(thr, pc, (uptr)d, 8);
+  } else {
+    // See the dup-related comment in FdClose.
+    MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
+  }
+}
+
+void FdInit() {
+  atomic_store(&fdctx.globsync.rc, (u64)-1, memory_order_relaxed);
+  atomic_store(&fdctx.filesync.rc, (u64)-1, memory_order_relaxed);
+  atomic_store(&fdctx.socksync.rc, (u64)-1, memory_order_relaxed);
+}
+
+void FdOnFork(ThreadState *thr, uptr pc) {
+  // On fork() we need to reset all fd's, because the child is going
+  // close all them, and that will cause races between previous read/write
+  // and the close.
+  for (int l1 = 0; l1 < kTableSizeL1; l1++) {
+    FdDesc *tab = (FdDesc*)atomic_load(&fdctx.tab[l1], memory_order_relaxed);
+    if (tab == 0)
+      break;
+    for (int l2 = 0; l2 < kTableSizeL2; l2++) {
+      FdDesc *d = &tab[l2];
+      MemoryResetRange(thr, pc, (uptr)d, 8);
+    }
+  }
+}
+
+bool FdLocation(uptr addr, int *fd, Tid *tid, StackID *stack) {
+  for (int l1 = 0; l1 < kTableSizeL1; l1++) {
+    FdDesc *tab = (FdDesc*)atomic_load(&fdctx.tab[l1], memory_order_relaxed);
+    if (tab == 0)
+      break;
+    if (addr >= (uptr)tab && addr < (uptr)(tab + kTableSizeL2)) {
+      int l2 = (addr - (uptr)tab) / sizeof(FdDesc);
+      FdDesc *d = &tab[l2];
+      *fd = l1 * kTableSizeL1 + l2;
+      *tid = d->creation_tid;
+      *stack = d->creation_stack;
+      return true;
+    }
+  }
+  return false;
+}
+
+void FdAcquire(ThreadState *thr, uptr pc, int fd) {
+  if (bogusfd(fd))
+    return;
+  FdDesc *d = fddesc(thr, pc, fd);
+  FdSync *s = d->sync;
+  DPrintf("#%d: FdAcquire(%d) -> %p\n", thr->tid, fd, s);
+  MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
+  if (s)
+    Acquire(thr, pc, (uptr)s);
+}
+
+void FdRelease(ThreadState *thr, uptr pc, int fd) {
+  if (bogusfd(fd))
+    return;
+  FdDesc *d = fddesc(thr, pc, fd);
+  FdSync *s = d->sync;
+  DPrintf("#%d: FdRelease(%d) -> %p\n", thr->tid, fd, s);
+  MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
+  if (s)
+    Release(thr, pc, (uptr)s);
+}
+
+void FdAccess(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdAccess(%d)\n", thr->tid, fd);
+  if (bogusfd(fd))
+    return;
+  FdDesc *d = fddesc(thr, pc, fd);
+  MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
+}
+
+void FdClose(ThreadState *thr, uptr pc, int fd, bool write) {
+  DPrintf("#%d: FdClose(%d)\n", thr->tid, fd);
+  if (bogusfd(fd))
+    return;
+  FdDesc *d = fddesc(thr, pc, fd);
+  if (write) {
+    // To catch races between fd usage and close.
+    MemoryAccess(thr, pc, (uptr)d, 8, kAccessWrite);
+  } else {
+    // This path is used only by dup2/dup3 calls.
+    // We do read instead of write because there is a number of legitimate
+    // cases where write would lead to false positives:
+    // 1. Some software dups a closed pipe in place of a socket before closing
+    //    the socket (to prevent races actually).
+    // 2. Some daemons dup /dev/null in place of stdin/stdout.
+    // On the other hand we have not seen cases when write here catches real
+    // bugs.
+    MemoryAccess(thr, pc, (uptr)d, 8, kAccessRead);
+  }
+  // We need to clear it, because if we do not intercept any call out there
+  // that creates fd, we will hit false postives.
+  MemoryResetRange(thr, pc, (uptr)d, 8);
+  unref(thr, pc, d->sync);
+  d->sync = 0;
+  d->creation_tid = kInvalidTid;
+  d->creation_stack = kInvalidStackID;
+}
+
+void FdFileCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdFileCreate(%d)\n", thr->tid, fd);
+  if (bogusfd(fd))
+    return;
+  init(thr, pc, fd, &fdctx.filesync);
+}
+
+void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd, bool write) {
+  DPrintf("#%d: FdDup(%d, %d)\n", thr->tid, oldfd, newfd);
+  if (bogusfd(oldfd) || bogusfd(newfd))
+    return;
+  // Ignore the case when user dups not yet connected socket.
+  FdDesc *od = fddesc(thr, pc, oldfd);
+  MemoryAccess(thr, pc, (uptr)od, 8, kAccessRead);
+  FdClose(thr, pc, newfd, write);
+  init(thr, pc, newfd, ref(od->sync), write);
+}
+
+void FdPipeCreate(ThreadState *thr, uptr pc, int rfd, int wfd) {
+  DPrintf("#%d: FdCreatePipe(%d, %d)\n", thr->tid, rfd, wfd);
+  FdSync *s = allocsync(thr, pc);
+  init(thr, pc, rfd, ref(s));
+  init(thr, pc, wfd, ref(s));
+  unref(thr, pc, s);
+}
+
+void FdEventCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdEventCreate(%d)\n", thr->tid, fd);
+  if (bogusfd(fd))
+    return;
+  init(thr, pc, fd, allocsync(thr, pc));
+}
+
+void FdSignalCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdSignalCreate(%d)\n", thr->tid, fd);
+  if (bogusfd(fd))
+    return;
+  init(thr, pc, fd, 0);
+}
+
+void FdInotifyCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdInotifyCreate(%d)\n", thr->tid, fd);
+  if (bogusfd(fd))
+    return;
+  init(thr, pc, fd, 0);
+}
+
+void FdPollCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdPollCreate(%d)\n", thr->tid, fd);
+  if (bogusfd(fd))
+    return;
+  init(thr, pc, fd, allocsync(thr, pc));
+}
+
+void FdSocketCreate(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdSocketCreate(%d)\n", thr->tid, fd);
+  if (bogusfd(fd))
+    return;
+  // It can be a UDP socket.
+  init(thr, pc, fd, &fdctx.socksync);
+}
+
+void FdSocketAccept(ThreadState *thr, uptr pc, int fd, int newfd) {
+  DPrintf("#%d: FdSocketAccept(%d, %d)\n", thr->tid, fd, newfd);
+  if (bogusfd(fd))
+    return;
+  // Synchronize connect->accept.
+  Acquire(thr, pc, (uptr)&fdctx.connectsync);
+  init(thr, pc, newfd, &fdctx.socksync);
+}
+
+void FdSocketConnecting(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdSocketConnecting(%d)\n", thr->tid, fd);
+  if (bogusfd(fd))
+    return;
+  // Synchronize connect->accept.
+  Release(thr, pc, (uptr)&fdctx.connectsync);
+}
+
+void FdSocketConnect(ThreadState *thr, uptr pc, int fd) {
+  DPrintf("#%d: FdSocketConnect(%d)\n", thr->tid, fd);
+  if (bogusfd(fd))
+    return;
+  init(thr, pc, fd, &fdctx.socksync);
+}
+
+uptr File2addr(const char *path) {
+  (void)path;
+  static u64 addr;
+  return (uptr)&addr;
+}
+
+uptr Dir2addr(const char *path) {
+  (void)path;
+  static u64 addr;
+  return (uptr)&addr;
+}
+
+}  //  namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_fd.h b/compiler-rt/lib/tsan/rtl-old/tsan_fd.h
new file mode 100644
index 0000000000000..d9648178481c6
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_fd.h
@@ -0,0 +1,64 @@
+//===-- tsan_fd.h -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// This file handles synchronization via IO.
+// People use IO for synchronization along the lines of:
+//
+// int X;
+// int client_socket;  // initialized elsewhere
+// int server_socket;  // initialized elsewhere
+//
+// Thread 1:
+// X = 42;
+// send(client_socket, ...);
+//
+// Thread 2:
+// if (recv(server_socket, ...) > 0)
+//   assert(X == 42);
+//
+// This file determines the scope of the file descriptor (pipe, socket,
+// all local files, etc) and executes acquire and release operations on
+// the scope as necessary.  Some scopes are very fine grained (e.g. pipe
+// operations synchronize only with operations on the same pipe), while
+// others are corse-grained (e.g. all operations on local files synchronize
+// with each other).
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_FD_H
+#define TSAN_FD_H
+
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+void FdInit();
+void FdAcquire(ThreadState *thr, uptr pc, int fd);
+void FdRelease(ThreadState *thr, uptr pc, int fd);
+void FdAccess(ThreadState *thr, uptr pc, int fd);
+void FdClose(ThreadState *thr, uptr pc, int fd, bool write = true);
+void FdFileCreate(ThreadState *thr, uptr pc, int fd);
+void FdDup(ThreadState *thr, uptr pc, int oldfd, int newfd, bool write);
+void FdPipeCreate(ThreadState *thr, uptr pc, int rfd, int wfd);
+void FdEventCreate(ThreadState *thr, uptr pc, int fd);
+void FdSignalCreate(ThreadState *thr, uptr pc, int fd);
+void FdInotifyCreate(ThreadState *thr, uptr pc, int fd);
+void FdPollCreate(ThreadState *thr, uptr pc, int fd);
+void FdSocketCreate(ThreadState *thr, uptr pc, int fd);
+void FdSocketAccept(ThreadState *thr, uptr pc, int fd, int newfd);
+void FdSocketConnecting(ThreadState *thr, uptr pc, int fd);
+void FdSocketConnect(ThreadState *thr, uptr pc, int fd);
+bool FdLocation(uptr addr, int *fd, Tid *tid, StackID *stack);
+void FdOnFork(ThreadState *thr, uptr pc);
+
+uptr File2addr(const char *path);
+uptr Dir2addr(const char *path);
+
+}  // namespace __tsan
+
+#endif  // TSAN_INTERFACE_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_flags.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_flags.cpp
new file mode 100644
index 0000000000000..ee89862d17bd8
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_flags.cpp
@@ -0,0 +1,126 @@
+//===-- tsan_flags.cpp ----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_flag_parser.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "tsan_flags.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+#include "ubsan/ubsan_flags.h"
+
+namespace __tsan {
+
+// Can be overriden in frontend.
+#ifdef TSAN_EXTERNAL_HOOKS
+extern "C" const char* __tsan_default_options();
+#else
+SANITIZER_WEAK_DEFAULT_IMPL
+const char *__tsan_default_options() {
+  return "";
+}
+#endif
+
+void Flags::SetDefaults() {
+#define TSAN_FLAG(Type, Name, DefaultValue, Description) Name = DefaultValue;
+#include "tsan_flags.inc"
+#undef TSAN_FLAG
+  // DDFlags
+  second_deadlock_stack = false;
+}
+
+void RegisterTsanFlags(FlagParser *parser, Flags *f) {
+#define TSAN_FLAG(Type, Name, DefaultValue, Description) \
+  RegisterFlag(parser, #Name, Description, &f->Name);
+#include "tsan_flags.inc"
+#undef TSAN_FLAG
+  // DDFlags
+  RegisterFlag(parser, "second_deadlock_stack",
+      "Report where each mutex is locked in deadlock reports",
+      &f->second_deadlock_stack);
+}
+
+void InitializeFlags(Flags *f, const char *env, const char *env_option_name) {
+  SetCommonFlagsDefaults();
+  {
+    // Override some common flags defaults.
+    CommonFlags cf;
+    cf.CopyFrom(*common_flags());
+    cf.external_symbolizer_path = GetEnv("TSAN_SYMBOLIZER_PATH");
+    cf.allow_addr2line = true;
+    if (SANITIZER_GO) {
+      // Does not work as expected for Go: runtime handles SIGABRT and crashes.
+      cf.abort_on_error = false;
+      // Go does not have mutexes.
+      cf.detect_deadlocks = false;
+    }
+    cf.print_suppressions = false;
+    cf.stack_trace_format = "    #%n %f %S %M";
+    cf.exitcode = 66;
+    cf.intercept_tls_get_addr = true;
+    OverrideCommonFlags(cf);
+  }
+
+  f->SetDefaults();
+
+  FlagParser parser;
+  RegisterTsanFlags(&parser, f);
+  RegisterCommonFlags(&parser);
+
+#if TSAN_CONTAINS_UBSAN
+  __ubsan::Flags *uf = __ubsan::flags();
+  uf->SetDefaults();
+
+  FlagParser ubsan_parser;
+  __ubsan::RegisterUbsanFlags(&ubsan_parser, uf);
+  RegisterCommonFlags(&ubsan_parser);
+#endif
+
+  // Let a frontend override.
+  parser.ParseString(__tsan_default_options());
+#if TSAN_CONTAINS_UBSAN
+  const char *ubsan_default_options = __ubsan_default_options();
+  ubsan_parser.ParseString(ubsan_default_options);
+#endif
+  // Override from command line.
+  parser.ParseString(env, env_option_name);
+#if TSAN_CONTAINS_UBSAN
+  ubsan_parser.ParseStringFromEnv("UBSAN_OPTIONS");
+#endif
+
+  // Sanity check.
+  if (!f->report_bugs) {
+    f->report_thread_leaks = false;
+    f->report_destroy_locked = false;
+    f->report_signal_unsafe = false;
+  }
+
+  InitializeCommonFlags();
+
+  if (Verbosity()) ReportUnrecognizedFlags();
+
+  if (common_flags()->help) parser.PrintFlagDescriptions();
+
+  if (f->history_size < 0 || f->history_size > 7) {
+    Printf("ThreadSanitizer: incorrect value for history_size"
+           " (must be [0..7])\n");
+    Die();
+  }
+
+  if (f->io_sync < 0 || f->io_sync > 2) {
+    Printf("ThreadSanitizer: incorrect value for io_sync"
+           " (must be [0..2])\n");
+    Die();
+  }
+}
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_flags.h b/compiler-rt/lib/tsan/rtl-old/tsan_flags.h
new file mode 100644
index 0000000000000..da27d5b992bcb
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_flags.h
@@ -0,0 +1,34 @@
+//===-- tsan_flags.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+// NOTE: This file may be included into user code.
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_FLAGS_H
+#define TSAN_FLAGS_H
+
+#include "sanitizer_common/sanitizer_flags.h"
+#include "sanitizer_common/sanitizer_deadlock_detector_interface.h"
+
+namespace __tsan {
+
+struct Flags : DDFlags {
+#define TSAN_FLAG(Type, Name, DefaultValue, Description) Type Name;
+#include "tsan_flags.inc"
+#undef TSAN_FLAG
+
+  void SetDefaults();
+  void ParseFromString(const char *str);
+};
+
+void InitializeFlags(Flags *flags, const char *env,
+                     const char *env_option_name = nullptr);
+}  // namespace __tsan
+
+#endif  // TSAN_FLAGS_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_flags.inc b/compiler-rt/lib/tsan/rtl-old/tsan_flags.inc
new file mode 100644
index 0000000000000..7954a4307fa1e
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_flags.inc
@@ -0,0 +1,84 @@
+//===-- tsan_flags.inc ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// TSan runtime flags.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_FLAG
+# error "Define TSAN_FLAG prior to including this file!"
+#endif
+
+// TSAN_FLAG(Type, Name, DefaultValue, Description)
+// See COMMON_FLAG in sanitizer_flags.inc for more details.
+
+TSAN_FLAG(bool, enable_annotations, true,
+          "Enable dynamic annotations, otherwise they are no-ops.")
+// Suppress a race report if we've already output another race report
+// with the same stack.
+TSAN_FLAG(bool, suppress_equal_stacks, true,
+          "Suppress a race report if we've already output another race report "
+          "with the same stack.")
+TSAN_FLAG(bool, suppress_equal_addresses, true,
+          "Suppress a race report if we've already output another race report "
+          "on the same address.")
+
+TSAN_FLAG(bool, report_bugs, true,
+          "Turns off bug reporting entirely (useful for benchmarking).")
+TSAN_FLAG(bool, report_thread_leaks, true, "Report thread leaks at exit?")
+TSAN_FLAG(bool, report_destroy_locked, true,
+          "Report destruction of a locked mutex?")
+TSAN_FLAG(bool, report_mutex_bugs, true,
+          "Report incorrect usages of mutexes and mutex annotations?")
+TSAN_FLAG(bool, report_signal_unsafe, true,
+          "Report violations of async signal-safety "
+          "(e.g. malloc() call from a signal handler).")
+TSAN_FLAG(bool, report_atomic_races, true,
+          "Report races between atomic and plain memory accesses.")
+TSAN_FLAG(
+    bool, force_seq_cst_atomics, false,
+    "If set, all atomics are effectively sequentially consistent (seq_cst), "
+    "regardless of what user actually specified.")
+TSAN_FLAG(bool, halt_on_error, false, "Exit after first reported error.")
+TSAN_FLAG(int, atexit_sleep_ms, 1000,
+          "Sleep in main thread before exiting for that many ms "
+          "(useful to catch \"at exit\" races).")
+TSAN_FLAG(const char *, profile_memory, "",
+          "If set, periodically write memory profile to that file.")
+TSAN_FLAG(int, flush_memory_ms, 0, "Flush shadow memory every X ms.")
+TSAN_FLAG(int, flush_symbolizer_ms, 5000, "Flush symbolizer caches every X ms.")
+TSAN_FLAG(
+    int, memory_limit_mb, 0,
+    "Resident memory limit in MB to aim at."
+    "If the process consumes more memory, then TSan will flush shadow memory.")
+TSAN_FLAG(bool, stop_on_start, false,
+          "Stops on start until __tsan_resume() is called (for debugging).")
+TSAN_FLAG(bool, running_on_valgrind, false,
+          "Controls whether RunningOnValgrind() returns true or false.")
+// There are a lot of goroutines in Go, so we use smaller history.
+TSAN_FLAG(
+    int, history_size, SANITIZER_GO ? 1 : 3,
+    "Per-thread history size, controls how many previous memory accesses "
+    "are remembered per thread.  Possible values are [0..7]. "
+    "history_size=0 amounts to 32K memory accesses.  Each next value doubles "
+    "the amount of memory accesses, up to history_size=7 that amounts to "
+    "4M memory accesses.  The default value is 2 (128K memory accesses).")
+TSAN_FLAG(int, io_sync, 1,
+          "Controls level of synchronization implied by IO operations. "
+          "0 - no synchronization "
+          "1 - reasonable level of synchronization (write->read)"
+          "2 - global synchronization of all IO operations.")
+TSAN_FLAG(bool, die_after_fork, true,
+          "Die after multi-threaded fork if the child creates new threads.")
+TSAN_FLAG(const char *, suppressions, "", "Suppressions file name.")
+TSAN_FLAG(bool, ignore_interceptors_accesses, SANITIZER_MAC ? true : false,
+          "Ignore reads and writes from all interceptors.")
+TSAN_FLAG(bool, ignore_noninstrumented_modules, SANITIZER_MAC ? true : false,
+          "Interceptors should only detect races when called from instrumented "
+          "modules.")
+TSAN_FLAG(bool, shared_ptr_interceptor, true,
+          "Track atomic reference counting in libc++ shared_ptr and weak_ptr.")

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_ignoreset.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_ignoreset.cpp
new file mode 100644
index 0000000000000..1fca1cf4f9fcf
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_ignoreset.cpp
@@ -0,0 +1,38 @@
+//===-- tsan_ignoreset.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_ignoreset.h"
+
+namespace __tsan {
+
+const uptr IgnoreSet::kMaxSize;
+
+IgnoreSet::IgnoreSet()
+    : size_() {
+}
+
+void IgnoreSet::Add(StackID stack_id) {
+  if (size_ == kMaxSize)
+    return;
+  for (uptr i = 0; i < size_; i++) {
+    if (stacks_[i] == stack_id)
+      return;
+  }
+  stacks_[size_++] = stack_id;
+}
+
+StackID IgnoreSet::At(uptr i) const {
+  CHECK_LT(i, size_);
+  CHECK_LE(size_, kMaxSize);
+  return stacks_[i];
+}
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_ignoreset.h b/compiler-rt/lib/tsan/rtl-old/tsan_ignoreset.h
new file mode 100644
index 0000000000000..4e2511291ce4d
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_ignoreset.h
@@ -0,0 +1,36 @@
+//===-- tsan_ignoreset.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// IgnoreSet holds a set of stack traces where ignores were enabled.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_IGNORESET_H
+#define TSAN_IGNORESET_H
+
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+class IgnoreSet {
+ public:
+  IgnoreSet();
+  void Add(StackID stack_id);
+  void Reset() { size_ = 0; }
+  uptr Size() const { return size_; }
+  StackID At(uptr i) const;
+
+ private:
+  static constexpr uptr kMaxSize = 16;
+  uptr size_;
+  StackID stacks_[kMaxSize];
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_IGNORESET_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_ilist.h b/compiler-rt/lib/tsan/rtl-old/tsan_ilist.h
new file mode 100644
index 0000000000000..d7d8be219dbe5
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_ilist.h
@@ -0,0 +1,189 @@
+//===-- tsan_ilist.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_ILIST_H
+#define TSAN_ILIST_H
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+
+namespace __tsan {
+
+class INode {
+ public:
+  INode() = default;
+
+ private:
+  INode* next_ = nullptr;
+  INode* prev_ = nullptr;
+
+  template <typename Base, INode Base::*Node, typename Elem>
+  friend class IList;
+  INode(const INode&) = delete;
+  void operator=(const INode&) = delete;
+};
+
+// Intrusive doubly-linked list.
+//
+// The node class (MyNode) needs to include "INode foo" field,
+// then the list can be declared as IList<MyNode, &MyNode::foo>.
+// This design allows to link MyNode into multiple lists using
+// 
diff erent INode fields.
+// The optional Elem template argument allows to specify node MDT
+// (most derived type) if it's 
diff erent from MyNode.
+template <typename Base, INode Base::*Node, typename Elem = Base>
+class IList {
+ public:
+  IList();
+
+  void PushFront(Elem* e);
+  void PushBack(Elem* e);
+  void Remove(Elem* e);
+
+  Elem* PopFront();
+  Elem* PopBack();
+  Elem* Front();
+  Elem* Back();
+
+  // Prev links point towards front of the queue.
+  Elem* Prev(Elem* e);
+  // Next links point towards back of the queue.
+  Elem* Next(Elem* e);
+
+  uptr Size() const;
+  bool Empty() const;
+  bool Queued(Elem* e) const;
+
+ private:
+  INode node_;
+  uptr size_ = 0;
+
+  void Push(Elem* e, INode* after);
+  static INode* ToNode(Elem* e);
+  static Elem* ToElem(INode* n);
+
+  IList(const IList&) = delete;
+  void operator=(const IList&) = delete;
+};
+
+template <typename Base, INode Base::*Node, typename Elem>
+IList<Base, Node, Elem>::IList() {
+  node_.next_ = node_.prev_ = &node_;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+void IList<Base, Node, Elem>::PushFront(Elem* e) {
+  Push(e, &node_);
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+void IList<Base, Node, Elem>::PushBack(Elem* e) {
+  Push(e, node_.prev_);
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+void IList<Base, Node, Elem>::Push(Elem* e, INode* after) {
+  INode* n = ToNode(e);
+  DCHECK_EQ(n->next_, nullptr);
+  DCHECK_EQ(n->prev_, nullptr);
+  INode* next = after->next_;
+  n->next_ = next;
+  n->prev_ = after;
+  next->prev_ = n;
+  after->next_ = n;
+  size_++;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+void IList<Base, Node, Elem>::Remove(Elem* e) {
+  INode* n = ToNode(e);
+  INode* next = n->next_;
+  INode* prev = n->prev_;
+  DCHECK(next);
+  DCHECK(prev);
+  DCHECK(size_);
+  next->prev_ = prev;
+  prev->next_ = next;
+  n->prev_ = n->next_ = nullptr;
+  size_--;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::PopFront() {
+  Elem* e = Front();
+  if (e)
+    Remove(e);
+  return e;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::PopBack() {
+  Elem* e = Back();
+  if (e)
+    Remove(e);
+  return e;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::Front() {
+  return size_ ? ToElem(node_.next_) : nullptr;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::Back() {
+  return size_ ? ToElem(node_.prev_) : nullptr;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::Prev(Elem* e) {
+  INode* n = ToNode(e);
+  DCHECK(n->prev_);
+  return n->prev_ != &node_ ? ToElem(n->prev_) : nullptr;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::Next(Elem* e) {
+  INode* n = ToNode(e);
+  DCHECK(n->next_);
+  return n->next_ != &node_ ? ToElem(n->next_) : nullptr;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+uptr IList<Base, Node, Elem>::Size() const {
+  return size_;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+bool IList<Base, Node, Elem>::Empty() const {
+  return size_ == 0;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+bool IList<Base, Node, Elem>::Queued(Elem* e) const {
+  INode* n = ToNode(e);
+  DCHECK_EQ(!n->next_, !n->prev_);
+  return n->next_;
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+INode* IList<Base, Node, Elem>::ToNode(Elem* e) {
+  return &(e->*Node);
+}
+
+template <typename Base, INode Base::*Node, typename Elem>
+Elem* IList<Base, Node, Elem>::ToElem(INode* n) {
+  return static_cast<Elem*>(reinterpret_cast<Base*>(
+      reinterpret_cast<uptr>(n) -
+      reinterpret_cast<uptr>(&(reinterpret_cast<Elem*>(0)->*Node))));
+}
+
+}  // namespace __tsan
+
+#endif

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors.h b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors.h
new file mode 100644
index 0000000000000..61dbb81ffec43
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors.h
@@ -0,0 +1,93 @@
+#ifndef TSAN_INTERCEPTORS_H
+#define TSAN_INTERCEPTORS_H
+
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+class ScopedInterceptor {
+ public:
+  ScopedInterceptor(ThreadState *thr, const char *fname, uptr pc);
+  ~ScopedInterceptor();
+  void DisableIgnores() {
+    if (UNLIKELY(ignoring_))
+      DisableIgnoresImpl();
+  }
+  void EnableIgnores() {
+    if (UNLIKELY(ignoring_))
+      EnableIgnoresImpl();
+  }
+
+ private:
+  ThreadState *const thr_;
+  bool in_ignored_lib_;
+  bool ignoring_;
+
+  void DisableIgnoresImpl();
+  void EnableIgnoresImpl();
+};
+
+LibIgnore *libignore();
+
+#if !SANITIZER_GO
+inline bool in_symbolizer() {
+  return UNLIKELY(cur_thread_init()->in_symbolizer);
+}
+#endif
+
+}  // namespace __tsan
+
+#define SCOPED_INTERCEPTOR_RAW(func, ...)            \
+  ThreadState *thr = cur_thread_init();              \
+  ScopedInterceptor si(thr, #func, GET_CALLER_PC()); \
+  UNUSED const uptr pc = GET_CURRENT_PC();
+
+#ifdef __powerpc64__
+// Debugging of crashes on powerpc after commit:
+// c80604f7a3 ("tsan: remove real func check from interceptors")
+// Somehow replacing if with DCHECK leads to strange failures in:
+// SanitizerCommon-tsan-powerpc64le-Linux :: Linux/ptrace.cpp
+// https://lab.llvm.org/buildbot/#/builders/105
+// https://lab.llvm.org/buildbot/#/builders/121
+// https://lab.llvm.org/buildbot/#/builders/57
+#  define CHECK_REAL_FUNC(func)                                          \
+    if (REAL(func) == 0) {                                               \
+      Report("FATAL: ThreadSanitizer: failed to intercept %s\n", #func); \
+      Die();                                                             \
+    }
+#else
+#  define CHECK_REAL_FUNC(func) DCHECK(REAL(func))
+#endif
+
+#define SCOPED_TSAN_INTERCEPTOR(func, ...)                                \
+  SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__);                              \
+  CHECK_REAL_FUNC(func);                                                  \
+  if (!thr->is_inited || thr->ignore_interceptors || thr->in_ignored_lib) \
+    return REAL(func)(__VA_ARGS__);
+
+#define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START() \
+    si.DisableIgnores();
+
+#define SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END() \
+    si.EnableIgnores();
+
+#define TSAN_INTERCEPTOR(ret, func, ...) INTERCEPTOR(ret, func, __VA_ARGS__)
+
+#if SANITIZER_NETBSD
+# define TSAN_INTERCEPTOR_NETBSD_ALIAS(ret, func, ...) \
+  TSAN_INTERCEPTOR(ret, __libc_##func, __VA_ARGS__) \
+  ALIAS(WRAPPER_NAME(pthread_##func));
+# define TSAN_INTERCEPTOR_NETBSD_ALIAS_THR(ret, func, ...) \
+  TSAN_INTERCEPTOR(ret, __libc_thr_##func, __VA_ARGS__) \
+  ALIAS(WRAPPER_NAME(pthread_##func));
+# define TSAN_INTERCEPTOR_NETBSD_ALIAS_THR2(ret, func, func2, ...) \
+  TSAN_INTERCEPTOR(ret, __libc_thr_##func, __VA_ARGS__) \
+  ALIAS(WRAPPER_NAME(pthread_##func2));
+#else
+# define TSAN_INTERCEPTOR_NETBSD_ALIAS(ret, func, ...)
+# define TSAN_INTERCEPTOR_NETBSD_ALIAS_THR(ret, func, ...)
+# define TSAN_INTERCEPTOR_NETBSD_ALIAS_THR2(ret, func, func2, ...)
+#endif
+
+#endif  // TSAN_INTERCEPTORS_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_libdispatch.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_libdispatch.cpp
new file mode 100644
index 0000000000000..cbbb7ecb2397e
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_libdispatch.cpp
@@ -0,0 +1,814 @@
+//===-- tsan_interceptors_libdispatch.cpp ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Support for intercepting libdispatch (GCD).
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "interception/interception.h"
+#include "tsan_interceptors.h"
+#include "tsan_rtl.h"
+
+#include "BlocksRuntime/Block.h"
+#include "tsan_dispatch_defs.h"
+
+#if SANITIZER_MAC
+# include <Availability.h>
+#endif
+
+namespace __tsan {
+  typedef u16 uint16_t;
+
+typedef struct {
+  dispatch_queue_t queue;
+  void *orig_context;
+  dispatch_function_t orig_work;
+  bool free_context_in_callback;
+  bool submitted_synchronously;
+  bool is_barrier_block;
+  uptr non_queue_sync_object;
+} block_context_t;
+
+// The offsets of 
diff erent fields of the dispatch_queue_t structure, exported
+// by libdispatch.dylib.
+extern "C" struct dispatch_queue_offsets_s {
+  const uint16_t dqo_version;
+  const uint16_t dqo_label;
+  const uint16_t dqo_label_size;
+  const uint16_t dqo_flags;
+  const uint16_t dqo_flags_size;
+  const uint16_t dqo_serialnum;
+  const uint16_t dqo_serialnum_size;
+  const uint16_t dqo_width;
+  const uint16_t dqo_width_size;
+  const uint16_t dqo_running;
+  const uint16_t dqo_running_size;
+  const uint16_t dqo_suspend_cnt;
+  const uint16_t dqo_suspend_cnt_size;
+  const uint16_t dqo_target_queue;
+  const uint16_t dqo_target_queue_size;
+  const uint16_t dqo_priority;
+  const uint16_t dqo_priority_size;
+} dispatch_queue_offsets;
+
+static bool IsQueueSerial(dispatch_queue_t q) {
+  CHECK_EQ(dispatch_queue_offsets.dqo_width_size, 2);
+  uptr width = *(uint16_t *)(((uptr)q) + dispatch_queue_offsets.dqo_width);
+  CHECK_NE(width, 0);
+  return width == 1;
+}
+
+static dispatch_queue_t GetTargetQueueFromQueue(dispatch_queue_t q) {
+  CHECK_EQ(dispatch_queue_offsets.dqo_target_queue_size, 8);
+  dispatch_queue_t tq = *(
+      dispatch_queue_t *)(((uptr)q) + dispatch_queue_offsets.dqo_target_queue);
+  return tq;
+}
+
+static dispatch_queue_t GetTargetQueueFromSource(dispatch_source_t source) {
+  dispatch_queue_t tq = GetTargetQueueFromQueue((dispatch_queue_t)source);
+  CHECK_NE(tq, 0);
+  return tq;
+}
+
+static block_context_t *AllocContext(ThreadState *thr, uptr pc,
+                                     dispatch_queue_t queue, void *orig_context,
+                                     dispatch_function_t orig_work) {
+  block_context_t *new_context =
+      (block_context_t *)user_alloc_internal(thr, pc, sizeof(block_context_t));
+  new_context->queue = queue;
+  new_context->orig_context = orig_context;
+  new_context->orig_work = orig_work;
+  new_context->free_context_in_callback = true;
+  new_context->submitted_synchronously = false;
+  new_context->is_barrier_block = false;
+  new_context->non_queue_sync_object = 0;
+  return new_context;
+}
+
+#define GET_QUEUE_SYNC_VARS(context, q)                                  \
+  bool is_queue_serial = q && IsQueueSerial(q);                          \
+  uptr sync_ptr = (uptr)q ?: context->non_queue_sync_object;             \
+  uptr serial_sync = (uptr)sync_ptr;                                     \
+  uptr concurrent_sync = sync_ptr ? ((uptr)sync_ptr) + sizeof(uptr) : 0; \
+  bool serial_task = context->is_barrier_block || is_queue_serial
+
+static void dispatch_sync_pre_execute(ThreadState *thr, uptr pc,
+                                      block_context_t *context) {
+  uptr submit_sync = (uptr)context;
+  Acquire(thr, pc, submit_sync);
+
+  dispatch_queue_t q = context->queue;
+  do {
+    GET_QUEUE_SYNC_VARS(context, q);
+    if (serial_sync) Acquire(thr, pc, serial_sync);
+    if (serial_task && concurrent_sync) Acquire(thr, pc, concurrent_sync);
+
+    if (q) q = GetTargetQueueFromQueue(q);
+  } while (q);
+}
+
+static void dispatch_sync_post_execute(ThreadState *thr, uptr pc,
+                                       block_context_t *context) {
+  uptr submit_sync = (uptr)context;
+  if (context->submitted_synchronously) Release(thr, pc, submit_sync);
+
+  dispatch_queue_t q = context->queue;
+  do {
+    GET_QUEUE_SYNC_VARS(context, q);
+    if (serial_task && serial_sync) Release(thr, pc, serial_sync);
+    if (!serial_task && concurrent_sync) Release(thr, pc, concurrent_sync);
+
+    if (q) q = GetTargetQueueFromQueue(q);
+  } while (q);
+}
+
+static void dispatch_callback_wrap(void *param) {
+  SCOPED_INTERCEPTOR_RAW(dispatch_callback_wrap);
+  block_context_t *context = (block_context_t *)param;
+
+  dispatch_sync_pre_execute(thr, pc, context);
+
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  context->orig_work(context->orig_context);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+
+  dispatch_sync_post_execute(thr, pc, context);
+
+  if (context->free_context_in_callback) user_free(thr, pc, context);
+}
+
+static void invoke_block(void *param) {
+  dispatch_block_t block = (dispatch_block_t)param;
+  block();
+}
+
+static void invoke_and_release_block(void *param) {
+  dispatch_block_t block = (dispatch_block_t)param;
+  block();
+  Block_release(block);
+}
+
+#define DISPATCH_INTERCEPT_ASYNC_B(name, barrier)                            \
+  TSAN_INTERCEPTOR(void, name, dispatch_queue_t q, dispatch_block_t block) { \
+    SCOPED_TSAN_INTERCEPTOR(name, q, block);                                 \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();                           \
+    dispatch_block_t heap_block = Block_copy(block);                         \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();                             \
+    block_context_t *new_context =                                           \
+        AllocContext(thr, pc, q, heap_block, &invoke_and_release_block);     \
+    new_context->is_barrier_block = barrier;                                 \
+    Release(thr, pc, (uptr)new_context);                                     \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();                           \
+    REAL(name##_f)(q, new_context, dispatch_callback_wrap);                  \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();                             \
+  }
+
+#define DISPATCH_INTERCEPT_SYNC_B(name, barrier)                             \
+  TSAN_INTERCEPTOR(void, name, dispatch_queue_t q,                           \
+                   DISPATCH_NOESCAPE dispatch_block_t block) {               \
+    SCOPED_TSAN_INTERCEPTOR(name, q, block);                                 \
+    block_context_t new_context = {                                          \
+        q, block, &invoke_block, false, true, barrier, 0};                   \
+    Release(thr, pc, (uptr)&new_context);                                    \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();                           \
+    REAL(name##_f)(q, &new_context, dispatch_callback_wrap);                 \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();                             \
+    Acquire(thr, pc, (uptr)&new_context);                                    \
+  }
+
+#define DISPATCH_INTERCEPT_ASYNC_F(name, barrier)                 \
+  TSAN_INTERCEPTOR(void, name, dispatch_queue_t q, void *context, \
+                   dispatch_function_t work) {                    \
+    SCOPED_TSAN_INTERCEPTOR(name, q, context, work);              \
+    block_context_t *new_context =                                \
+        AllocContext(thr, pc, q, context, work);                  \
+    new_context->is_barrier_block = barrier;                      \
+    Release(thr, pc, (uptr)new_context);                          \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();                \
+    REAL(name)(q, new_context, dispatch_callback_wrap);           \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();                  \
+  }
+
+#define DISPATCH_INTERCEPT_SYNC_F(name, barrier)                              \
+  TSAN_INTERCEPTOR(void, name, dispatch_queue_t q, void *context,             \
+                   dispatch_function_t work) {                                \
+    SCOPED_TSAN_INTERCEPTOR(name, q, context, work);                          \
+    block_context_t new_context = {                                           \
+        q, context, work, false, true, barrier, 0};                           \
+    Release(thr, pc, (uptr)&new_context);                                     \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();                            \
+    REAL(name)(q, &new_context, dispatch_callback_wrap);                      \
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();                              \
+    Acquire(thr, pc, (uptr)&new_context);                                     \
+  }
+
+#define DISPATCH_INTERCEPT(name, barrier)             \
+  DISPATCH_INTERCEPT_ASYNC_F(name##_async_f, barrier) \
+  DISPATCH_INTERCEPT_ASYNC_B(name##_async, barrier)   \
+  DISPATCH_INTERCEPT_SYNC_F(name##_sync_f, barrier)   \
+  DISPATCH_INTERCEPT_SYNC_B(name##_sync, barrier)
+
+// We wrap dispatch_async, dispatch_sync and friends where we allocate a new
+// context, which is used to synchronize (we release the context before
+// submitting, and the callback acquires it before executing the original
+// callback).
+DISPATCH_INTERCEPT(dispatch, false)
+DISPATCH_INTERCEPT(dispatch_barrier, true)
+
+// dispatch_async_and_wait() and friends were introduced in macOS 10.14.
+// Linking of these interceptors fails when using an older SDK.
+#if !SANITIZER_MAC || defined(__MAC_10_14)
+// macOS 10.14 is greater than our minimal deployment target.  To ensure we
+// generate a weak reference so the TSan dylib continues to work on older
+// systems, we need to forward declare the intercepted functions as "weak
+// imports".   Note that this file is multi-platform, so we cannot include the
+// actual header file (#include <dispatch/dispatch.h>).
+SANITIZER_WEAK_IMPORT void dispatch_async_and_wait(
+    dispatch_queue_t queue, DISPATCH_NOESCAPE dispatch_block_t block);
+SANITIZER_WEAK_IMPORT void dispatch_async_and_wait_f(
+    dispatch_queue_t queue, void *context, dispatch_function_t work);
+SANITIZER_WEAK_IMPORT void dispatch_barrier_async_and_wait(
+    dispatch_queue_t queue, DISPATCH_NOESCAPE dispatch_block_t block);
+SANITIZER_WEAK_IMPORT void dispatch_barrier_async_and_wait_f(
+    dispatch_queue_t queue, void *context, dispatch_function_t work);
+
+DISPATCH_INTERCEPT_SYNC_F(dispatch_async_and_wait_f, false)
+DISPATCH_INTERCEPT_SYNC_B(dispatch_async_and_wait, false)
+DISPATCH_INTERCEPT_SYNC_F(dispatch_barrier_async_and_wait_f, true)
+DISPATCH_INTERCEPT_SYNC_B(dispatch_barrier_async_and_wait, true)
+#endif
+
+
+DECLARE_REAL(void, dispatch_after_f, dispatch_time_t when,
+             dispatch_queue_t queue, void *context, dispatch_function_t work)
+
+TSAN_INTERCEPTOR(void, dispatch_after, dispatch_time_t when,
+                 dispatch_queue_t queue, dispatch_block_t block) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_after, when, queue, block);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  dispatch_block_t heap_block = Block_copy(block);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+  block_context_t *new_context =
+      AllocContext(thr, pc, queue, heap_block, &invoke_and_release_block);
+  Release(thr, pc, (uptr)new_context);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  REAL(dispatch_after_f)(when, queue, new_context, dispatch_callback_wrap);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+}
+
+TSAN_INTERCEPTOR(void, dispatch_after_f, dispatch_time_t when,
+                 dispatch_queue_t queue, void *context,
+                 dispatch_function_t work) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_after_f, when, queue, context, work);
+  WRAP(dispatch_after)(when, queue, ^(void) {
+    work(context);
+  });
+}
+
+// GCD's dispatch_once implementation has a fast path that contains a racy read
+// and it's inlined into user's code. Furthermore, this fast path doesn't
+// establish a proper happens-before relations between the initialization and
+// code following the call to dispatch_once. We could deal with this in
+// instrumented code, but there's not much we can do about it in system
+// libraries. Let's disable the fast path (by never storing the value ~0 to
+// predicate), so the interceptor is always called, and let's add proper release
+// and acquire semantics. Since TSan does not see its own atomic stores, the
+// race on predicate won't be reported - the only accesses to it that TSan sees
+// are the loads on the fast path. Loads don't race. Secondly, dispatch_once is
+// both a macro and a real function, we want to intercept the function, so we
+// need to undefine the macro.
+#undef dispatch_once
+TSAN_INTERCEPTOR(void, dispatch_once, dispatch_once_t *predicate,
+                 DISPATCH_NOESCAPE dispatch_block_t block) {
+  SCOPED_INTERCEPTOR_RAW(dispatch_once, predicate, block);
+  atomic_uint32_t *a = reinterpret_cast<atomic_uint32_t *>(predicate);
+  u32 v = atomic_load(a, memory_order_acquire);
+  if (v == 0 &&
+      atomic_compare_exchange_strong(a, &v, 1, memory_order_relaxed)) {
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+    block();
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+    Release(thr, pc, (uptr)a);
+    atomic_store(a, 2, memory_order_release);
+  } else {
+    while (v != 2) {
+      internal_sched_yield();
+      v = atomic_load(a, memory_order_acquire);
+    }
+    Acquire(thr, pc, (uptr)a);
+  }
+}
+
+#undef dispatch_once_f
+TSAN_INTERCEPTOR(void, dispatch_once_f, dispatch_once_t *predicate,
+                 void *context, dispatch_function_t function) {
+  SCOPED_INTERCEPTOR_RAW(dispatch_once_f, predicate, context, function);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  WRAP(dispatch_once)(predicate, ^(void) {
+    function(context);
+  });
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+}
+
+TSAN_INTERCEPTOR(long_t, dispatch_semaphore_signal,
+                 dispatch_semaphore_t dsema) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_semaphore_signal, dsema);
+  Release(thr, pc, (uptr)dsema);
+  return REAL(dispatch_semaphore_signal)(dsema);
+}
+
+TSAN_INTERCEPTOR(long_t, dispatch_semaphore_wait, dispatch_semaphore_t dsema,
+                 dispatch_time_t timeout) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_semaphore_wait, dsema, timeout);
+  long_t result = REAL(dispatch_semaphore_wait)(dsema, timeout);
+  if (result == 0) Acquire(thr, pc, (uptr)dsema);
+  return result;
+}
+
+TSAN_INTERCEPTOR(long_t, dispatch_group_wait, dispatch_group_t group,
+                 dispatch_time_t timeout) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_wait, group, timeout);
+  long_t result = REAL(dispatch_group_wait)(group, timeout);
+  if (result == 0) Acquire(thr, pc, (uptr)group);
+  return result;
+}
+
+// Used, but not intercepted.
+extern "C" void dispatch_group_enter(dispatch_group_t group);
+
+TSAN_INTERCEPTOR(void, dispatch_group_leave, dispatch_group_t group) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_leave, group);
+  // Acquired in the group notification callback in dispatch_group_notify[_f].
+  Release(thr, pc, (uptr)group);
+  REAL(dispatch_group_leave)(group);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_group_async, dispatch_group_t group,
+                 dispatch_queue_t queue, dispatch_block_t block) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_async, group, queue, block);
+  dispatch_retain(group);
+  dispatch_group_enter(group);
+  __block dispatch_block_t block_copy = (dispatch_block_t)Block_copy(block);
+  WRAP(dispatch_async)(queue, ^(void) {
+    block_copy();
+    Block_release(block_copy);
+    WRAP(dispatch_group_leave)(group);
+    dispatch_release(group);
+  });
+}
+
+TSAN_INTERCEPTOR(void, dispatch_group_async_f, dispatch_group_t group,
+                 dispatch_queue_t queue, void *context,
+                 dispatch_function_t work) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_async_f, group, queue, context, work);
+  dispatch_retain(group);
+  dispatch_group_enter(group);
+  WRAP(dispatch_async)(queue, ^(void) {
+    work(context);
+    WRAP(dispatch_group_leave)(group);
+    dispatch_release(group);
+  });
+}
+
+DECLARE_REAL(void, dispatch_group_notify_f, dispatch_group_t group,
+             dispatch_queue_t q, void *context, dispatch_function_t work)
+
+TSAN_INTERCEPTOR(void, dispatch_group_notify, dispatch_group_t group,
+                 dispatch_queue_t q, dispatch_block_t block) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_group_notify, group, q, block);
+
+  // To make sure the group is still available in the callback (otherwise
+  // it can be already destroyed).  Will be released in the callback.
+  dispatch_retain(group);
+
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  dispatch_block_t heap_block = Block_copy(^(void) {
+    {
+      SCOPED_INTERCEPTOR_RAW(dispatch_read_callback);
+      // Released when leaving the group (dispatch_group_leave).
+      Acquire(thr, pc, (uptr)group);
+    }
+    dispatch_release(group);
+    block();
+  });
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+  block_context_t *new_context =
+      AllocContext(thr, pc, q, heap_block, &invoke_and_release_block);
+  new_context->is_barrier_block = true;
+  Release(thr, pc, (uptr)new_context);
+  REAL(dispatch_group_notify_f)(group, q, new_context, dispatch_callback_wrap);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_group_notify_f, dispatch_group_t group,
+                 dispatch_queue_t q, void *context, dispatch_function_t work) {
+  WRAP(dispatch_group_notify)(group, q, ^(void) { work(context); });
+}
+
+TSAN_INTERCEPTOR(void, dispatch_source_set_event_handler,
+                 dispatch_source_t source, dispatch_block_t handler) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_source_set_event_handler, source, handler);
+  if (handler == nullptr)
+    return REAL(dispatch_source_set_event_handler)(source, nullptr);
+  dispatch_queue_t q = GetTargetQueueFromSource(source);
+  __block block_context_t new_context = {
+      q, handler, &invoke_block, false, false, false, 0 };
+  dispatch_block_t new_handler = Block_copy(^(void) {
+    new_context.orig_context = handler;  // To explicitly capture "handler".
+    dispatch_callback_wrap(&new_context);
+  });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  REAL(dispatch_source_set_event_handler)(source, new_handler);
+  Block_release(new_handler);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_source_set_event_handler_f,
+                 dispatch_source_t source, dispatch_function_t handler) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_source_set_event_handler_f, source, handler);
+  if (handler == nullptr)
+    return REAL(dispatch_source_set_event_handler)(source, nullptr);
+  dispatch_block_t block = ^(void) {
+    handler(dispatch_get_context(source));
+  };
+  WRAP(dispatch_source_set_event_handler)(source, block);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_source_set_cancel_handler,
+                 dispatch_source_t source, dispatch_block_t handler) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_source_set_cancel_handler, source, handler);
+  if (handler == nullptr)
+    return REAL(dispatch_source_set_cancel_handler)(source, nullptr);
+  dispatch_queue_t q = GetTargetQueueFromSource(source);
+  __block block_context_t new_context = {
+      q, handler, &invoke_block, false, false, false, 0};
+  dispatch_block_t new_handler = Block_copy(^(void) {
+    new_context.orig_context = handler;  // To explicitly capture "handler".
+    dispatch_callback_wrap(&new_context);
+  });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  REAL(dispatch_source_set_cancel_handler)(source, new_handler);
+  Block_release(new_handler);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_source_set_cancel_handler_f,
+                 dispatch_source_t source, dispatch_function_t handler) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_source_set_cancel_handler_f, source,
+                          handler);
+  if (handler == nullptr)
+    return REAL(dispatch_source_set_cancel_handler)(source, nullptr);
+  dispatch_block_t block = ^(void) {
+    handler(dispatch_get_context(source));
+  };
+  WRAP(dispatch_source_set_cancel_handler)(source, block);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_source_set_registration_handler,
+                 dispatch_source_t source, dispatch_block_t handler) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_source_set_registration_handler, source,
+                          handler);
+  if (handler == nullptr)
+    return REAL(dispatch_source_set_registration_handler)(source, nullptr);
+  dispatch_queue_t q = GetTargetQueueFromSource(source);
+  __block block_context_t new_context = {
+      q, handler, &invoke_block, false, false, false, 0};
+  dispatch_block_t new_handler = Block_copy(^(void) {
+    new_context.orig_context = handler;  // To explicitly capture "handler".
+    dispatch_callback_wrap(&new_context);
+  });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  REAL(dispatch_source_set_registration_handler)(source, new_handler);
+  Block_release(new_handler);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_source_set_registration_handler_f,
+                 dispatch_source_t source, dispatch_function_t handler) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_source_set_registration_handler_f, source,
+                          handler);
+  if (handler == nullptr)
+    return REAL(dispatch_source_set_registration_handler)(source, nullptr);
+  dispatch_block_t block = ^(void) {
+    handler(dispatch_get_context(source));
+  };
+  WRAP(dispatch_source_set_registration_handler)(source, block);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_apply, size_t iterations,
+                 dispatch_queue_t queue,
+                 DISPATCH_NOESCAPE void (^block)(size_t)) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_apply, iterations, queue, block);
+
+  u8 sync1, sync2;
+  uptr parent_to_child_sync = (uptr)&sync1;
+  uptr child_to_parent_sync = (uptr)&sync2;
+
+  Release(thr, pc, parent_to_child_sync);
+  void (^new_block)(size_t) = ^(size_t iteration) {
+    SCOPED_INTERCEPTOR_RAW(dispatch_apply);
+    Acquire(thr, pc, parent_to_child_sync);
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+    block(iteration);
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+    Release(thr, pc, child_to_parent_sync);
+  };
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  REAL(dispatch_apply)(iterations, queue, new_block);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+  Acquire(thr, pc, child_to_parent_sync);
+}
+
+static void invoke_block_iteration(void *param, size_t iteration) {
+  auto block = (void (^)(size_t)) param;
+  block(iteration);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_apply_f, size_t iterations,
+                 dispatch_queue_t queue, void *context,
+                 void (*work)(void *, size_t)) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_apply_f, iterations, queue, context, work);
+
+  // Unfortunately, we cannot delegate to dispatch_apply, since libdispatch
+  // implements dispatch_apply in terms of dispatch_apply_f.
+  u8 sync1, sync2;
+  uptr parent_to_child_sync = (uptr)&sync1;
+  uptr child_to_parent_sync = (uptr)&sync2;
+
+  Release(thr, pc, parent_to_child_sync);
+  void (^new_block)(size_t) = ^(size_t iteration) {
+    SCOPED_INTERCEPTOR_RAW(dispatch_apply_f);
+    Acquire(thr, pc, parent_to_child_sync);
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+    work(context, iteration);
+    SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+    Release(thr, pc, child_to_parent_sync);
+  };
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  REAL(dispatch_apply_f)(iterations, queue, new_block, invoke_block_iteration);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+  Acquire(thr, pc, child_to_parent_sync);
+}
+
+DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
+DECLARE_REAL_AND_INTERCEPTOR(int, munmap, void *addr, long_t sz)
+
+TSAN_INTERCEPTOR(dispatch_data_t, dispatch_data_create, const void *buffer,
+                 size_t size, dispatch_queue_t q, dispatch_block_t destructor) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_data_create, buffer, size, q, destructor);
+  if ((q == nullptr) || (destructor == DISPATCH_DATA_DESTRUCTOR_DEFAULT))
+    return REAL(dispatch_data_create)(buffer, size, q, destructor);
+
+  if (destructor == DISPATCH_DATA_DESTRUCTOR_FREE)
+    destructor = ^(void) { WRAP(free)((void *)(uintptr_t)buffer); };
+  else if (destructor == DISPATCH_DATA_DESTRUCTOR_MUNMAP)
+    destructor = ^(void) { WRAP(munmap)((void *)(uintptr_t)buffer, size); };
+
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START();
+  dispatch_block_t heap_block = Block_copy(destructor);
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END();
+  block_context_t *new_context =
+      AllocContext(thr, pc, q, heap_block, &invoke_and_release_block);
+  uptr submit_sync = (uptr)new_context;
+  Release(thr, pc, submit_sync);
+  return REAL(dispatch_data_create)(buffer, size, q, ^(void) {
+    dispatch_callback_wrap(new_context);
+  });
+}
+
+typedef void (^fd_handler_t)(dispatch_data_t data, int error);
+typedef void (^cleanup_handler_t)(int error);
+
+TSAN_INTERCEPTOR(void, dispatch_read, dispatch_fd_t fd, size_t length,
+                 dispatch_queue_t q, fd_handler_t h) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_read, fd, length, q, h);
+  __block block_context_t new_context = {
+      q, nullptr, &invoke_block, false, false, false, 0};
+  fd_handler_t new_h = Block_copy(^(dispatch_data_t data, int error) {
+    new_context.orig_context = ^(void) {
+      h(data, error);
+    };
+    dispatch_callback_wrap(&new_context);
+  });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  REAL(dispatch_read)(fd, length, q, new_h);
+  Block_release(new_h);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_write, dispatch_fd_t fd, dispatch_data_t data,
+                 dispatch_queue_t q, fd_handler_t h) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_write, fd, data, q, h);
+  __block block_context_t new_context = {
+      q, nullptr, &invoke_block, false, false, false, 0};
+  fd_handler_t new_h = Block_copy(^(dispatch_data_t data, int error) {
+    new_context.orig_context = ^(void) {
+      h(data, error);
+    };
+    dispatch_callback_wrap(&new_context);
+  });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  REAL(dispatch_write)(fd, data, q, new_h);
+  Block_release(new_h);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_io_read, dispatch_io_t channel, off_t offset,
+                 size_t length, dispatch_queue_t q, dispatch_io_handler_t h) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_io_read, channel, offset, length, q, h);
+  __block block_context_t new_context = {
+      q, nullptr, &invoke_block, false, false, false, 0};
+  dispatch_io_handler_t new_h =
+      Block_copy(^(bool done, dispatch_data_t data, int error) {
+        new_context.orig_context = ^(void) {
+          h(done, data, error);
+        };
+        dispatch_callback_wrap(&new_context);
+      });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  REAL(dispatch_io_read)(channel, offset, length, q, new_h);
+  Block_release(new_h);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_io_write, dispatch_io_t channel, off_t offset,
+                 dispatch_data_t data, dispatch_queue_t q,
+                 dispatch_io_handler_t h) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_io_write, channel, offset, data, q, h);
+  __block block_context_t new_context = {
+      q, nullptr, &invoke_block, false, false, false, 0};
+  dispatch_io_handler_t new_h =
+      Block_copy(^(bool done, dispatch_data_t data, int error) {
+        new_context.orig_context = ^(void) {
+          h(done, data, error);
+        };
+        dispatch_callback_wrap(&new_context);
+      });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  REAL(dispatch_io_write)(channel, offset, data, q, new_h);
+  Block_release(new_h);
+}
+
+TSAN_INTERCEPTOR(void, dispatch_io_barrier, dispatch_io_t channel,
+                 dispatch_block_t barrier) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_io_barrier, channel, barrier);
+  __block block_context_t new_context = {
+      nullptr, nullptr, &invoke_block, false, false, false, 0};
+  new_context.non_queue_sync_object = (uptr)channel;
+  new_context.is_barrier_block = true;
+  dispatch_block_t new_block = Block_copy(^(void) {
+    new_context.orig_context = ^(void) {
+      barrier();
+    };
+    dispatch_callback_wrap(&new_context);
+  });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  REAL(dispatch_io_barrier)(channel, new_block);
+  Block_release(new_block);
+}
+
+TSAN_INTERCEPTOR(dispatch_io_t, dispatch_io_create, dispatch_io_type_t type,
+                 dispatch_fd_t fd, dispatch_queue_t q, cleanup_handler_t h) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_io_create, type, fd, q, h);
+  __block dispatch_io_t new_channel = nullptr;
+  __block block_context_t new_context = {
+      q, nullptr, &invoke_block, false, false, false, 0};
+  cleanup_handler_t new_h = Block_copy(^(int error) {
+    {
+      SCOPED_INTERCEPTOR_RAW(dispatch_io_create_callback);
+      Acquire(thr, pc, (uptr)new_channel);  // Release() in dispatch_io_close.
+    }
+    new_context.orig_context = ^(void) {
+      h(error);
+    };
+    dispatch_callback_wrap(&new_context);
+  });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  new_channel = REAL(dispatch_io_create)(type, fd, q, new_h);
+  Block_release(new_h);
+  return new_channel;
+}
+
+TSAN_INTERCEPTOR(dispatch_io_t, dispatch_io_create_with_path,
+                 dispatch_io_type_t type, const char *path, int oflag,
+                 mode_t mode, dispatch_queue_t q, cleanup_handler_t h) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_io_create_with_path, type, path, oflag, mode,
+                          q, h);
+  __block dispatch_io_t new_channel = nullptr;
+  __block block_context_t new_context = {
+      q, nullptr, &invoke_block, false, false, false, 0};
+  cleanup_handler_t new_h = Block_copy(^(int error) {
+    {
+      SCOPED_INTERCEPTOR_RAW(dispatch_io_create_callback);
+      Acquire(thr, pc, (uptr)new_channel);  // Release() in dispatch_io_close.
+    }
+    new_context.orig_context = ^(void) {
+      h(error);
+    };
+    dispatch_callback_wrap(&new_context);
+  });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  new_channel =
+      REAL(dispatch_io_create_with_path)(type, path, oflag, mode, q, new_h);
+  Block_release(new_h);
+  return new_channel;
+}
+
+TSAN_INTERCEPTOR(dispatch_io_t, dispatch_io_create_with_io,
+                 dispatch_io_type_t type, dispatch_io_t io, dispatch_queue_t q,
+                 cleanup_handler_t h) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_io_create_with_io, type, io, q, h);
+  __block dispatch_io_t new_channel = nullptr;
+  __block block_context_t new_context = {
+      q, nullptr, &invoke_block, false, false, false, 0};
+  cleanup_handler_t new_h = Block_copy(^(int error) {
+    {
+      SCOPED_INTERCEPTOR_RAW(dispatch_io_create_callback);
+      Acquire(thr, pc, (uptr)new_channel);  // Release() in dispatch_io_close.
+    }
+    new_context.orig_context = ^(void) {
+      h(error);
+    };
+    dispatch_callback_wrap(&new_context);
+  });
+  uptr submit_sync = (uptr)&new_context;
+  Release(thr, pc, submit_sync);
+  new_channel = REAL(dispatch_io_create_with_io)(type, io, q, new_h);
+  Block_release(new_h);
+  return new_channel;
+}
+
+TSAN_INTERCEPTOR(void, dispatch_io_close, dispatch_io_t channel,
+                 dispatch_io_close_flags_t flags) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_io_close, channel, flags);
+  Release(thr, pc, (uptr)channel);  // Acquire() in dispatch_io_create[_*].
+  return REAL(dispatch_io_close)(channel, flags);
+}
+
+// Resuming a suspended queue needs to synchronize with all subsequent
+// executions of blocks in that queue.
+TSAN_INTERCEPTOR(void, dispatch_resume, dispatch_object_t o) {
+  SCOPED_TSAN_INTERCEPTOR(dispatch_resume, o);
+  Release(thr, pc, (uptr)o);  // Synchronizes with the Acquire() on serial_sync
+                              // in dispatch_sync_pre_execute
+  return REAL(dispatch_resume)(o);
+}
+
+void InitializeLibdispatchInterceptors() {
+  INTERCEPT_FUNCTION(dispatch_async);
+  INTERCEPT_FUNCTION(dispatch_async_f);
+  INTERCEPT_FUNCTION(dispatch_sync);
+  INTERCEPT_FUNCTION(dispatch_sync_f);
+  INTERCEPT_FUNCTION(dispatch_barrier_async);
+  INTERCEPT_FUNCTION(dispatch_barrier_async_f);
+  INTERCEPT_FUNCTION(dispatch_barrier_sync);
+  INTERCEPT_FUNCTION(dispatch_barrier_sync_f);
+  INTERCEPT_FUNCTION(dispatch_async_and_wait);
+  INTERCEPT_FUNCTION(dispatch_async_and_wait_f);
+  INTERCEPT_FUNCTION(dispatch_barrier_async_and_wait);
+  INTERCEPT_FUNCTION(dispatch_barrier_async_and_wait_f);
+  INTERCEPT_FUNCTION(dispatch_after);
+  INTERCEPT_FUNCTION(dispatch_after_f);
+  INTERCEPT_FUNCTION(dispatch_once);
+  INTERCEPT_FUNCTION(dispatch_once_f);
+  INTERCEPT_FUNCTION(dispatch_semaphore_signal);
+  INTERCEPT_FUNCTION(dispatch_semaphore_wait);
+  INTERCEPT_FUNCTION(dispatch_group_wait);
+  INTERCEPT_FUNCTION(dispatch_group_leave);
+  INTERCEPT_FUNCTION(dispatch_group_async);
+  INTERCEPT_FUNCTION(dispatch_group_async_f);
+  INTERCEPT_FUNCTION(dispatch_group_notify);
+  INTERCEPT_FUNCTION(dispatch_group_notify_f);
+  INTERCEPT_FUNCTION(dispatch_source_set_event_handler);
+  INTERCEPT_FUNCTION(dispatch_source_set_event_handler_f);
+  INTERCEPT_FUNCTION(dispatch_source_set_cancel_handler);
+  INTERCEPT_FUNCTION(dispatch_source_set_cancel_handler_f);
+  INTERCEPT_FUNCTION(dispatch_source_set_registration_handler);
+  INTERCEPT_FUNCTION(dispatch_source_set_registration_handler_f);
+  INTERCEPT_FUNCTION(dispatch_apply);
+  INTERCEPT_FUNCTION(dispatch_apply_f);
+  INTERCEPT_FUNCTION(dispatch_data_create);
+  INTERCEPT_FUNCTION(dispatch_read);
+  INTERCEPT_FUNCTION(dispatch_write);
+  INTERCEPT_FUNCTION(dispatch_io_read);
+  INTERCEPT_FUNCTION(dispatch_io_write);
+  INTERCEPT_FUNCTION(dispatch_io_barrier);
+  INTERCEPT_FUNCTION(dispatch_io_create);
+  INTERCEPT_FUNCTION(dispatch_io_create_with_path);
+  INTERCEPT_FUNCTION(dispatch_io_create_with_io);
+  INTERCEPT_FUNCTION(dispatch_io_close);
+  INTERCEPT_FUNCTION(dispatch_resume);
+}
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mac.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mac.cpp
new file mode 100644
index 0000000000000..ed064150d005c
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mac.cpp
@@ -0,0 +1,521 @@
+//===-- tsan_interceptors_mac.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Mac-specific interceptors.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_MAC
+
+#include "interception/interception.h"
+#include "tsan_interceptors.h"
+#include "tsan_interface.h"
+#include "tsan_interface_ann.h"
+#include "sanitizer_common/sanitizer_addrhashmap.h"
+
+#include <errno.h>
+#include <libkern/OSAtomic.h>
+#include <objc/objc-sync.h>
+#include <os/lock.h>
+#include <sys/ucontext.h>
+
+#if defined(__has_include) && __has_include(<xpc/xpc.h>)
+#include <xpc/xpc.h>
+#endif  // #if defined(__has_include) && __has_include(<xpc/xpc.h>)
+
+typedef long long_t;
+
+extern "C" {
+int getcontext(ucontext_t *ucp) __attribute__((returns_twice));
+int setcontext(const ucontext_t *ucp);
+}
+
+namespace __tsan {
+
+// The non-barrier versions of OSAtomic* functions are semantically mo_relaxed,
+// but the two variants (e.g. OSAtomicAdd32 and OSAtomicAdd32Barrier) are
+// actually aliases of each other, and we cannot have 
diff erent interceptors for
+// them, because they're actually the same function.  Thus, we have to stay
+// conservative and treat the non-barrier versions as mo_acq_rel.
+static constexpr morder kMacOrderBarrier = mo_acq_rel;
+static constexpr morder kMacOrderNonBarrier = mo_acq_rel;
+static constexpr morder kMacFailureOrder = mo_relaxed;
+
+#define OSATOMIC_INTERCEPTOR(return_t, t, tsan_t, f, tsan_atomic_f, mo) \
+  TSAN_INTERCEPTOR(return_t, f, t x, volatile t *ptr) {                 \
+    SCOPED_TSAN_INTERCEPTOR(f, x, ptr);                                 \
+    return tsan_atomic_f((volatile tsan_t *)ptr, x, mo);                \
+  }
+
+#define OSATOMIC_INTERCEPTOR_PLUS_X(return_t, t, tsan_t, f, tsan_atomic_f, mo) \
+  TSAN_INTERCEPTOR(return_t, f, t x, volatile t *ptr) {                        \
+    SCOPED_TSAN_INTERCEPTOR(f, x, ptr);                                        \
+    return tsan_atomic_f((volatile tsan_t *)ptr, x, mo) + x;                   \
+  }
+
+#define OSATOMIC_INTERCEPTOR_PLUS_1(return_t, t, tsan_t, f, tsan_atomic_f, mo) \
+  TSAN_INTERCEPTOR(return_t, f, volatile t *ptr) {                             \
+    SCOPED_TSAN_INTERCEPTOR(f, ptr);                                           \
+    return tsan_atomic_f((volatile tsan_t *)ptr, 1, mo) + 1;                   \
+  }
+
+#define OSATOMIC_INTERCEPTOR_MINUS_1(return_t, t, tsan_t, f, tsan_atomic_f, \
+                                     mo)                                    \
+  TSAN_INTERCEPTOR(return_t, f, volatile t *ptr) {                          \
+    SCOPED_TSAN_INTERCEPTOR(f, ptr);                                        \
+    return tsan_atomic_f((volatile tsan_t *)ptr, 1, mo) - 1;                \
+  }
+
+#define OSATOMIC_INTERCEPTORS_ARITHMETIC(f, tsan_atomic_f, m)                  \
+  m(int32_t, int32_t, a32, f##32, __tsan_atomic32_##tsan_atomic_f,             \
+    kMacOrderNonBarrier)                                                       \
+  m(int32_t, int32_t, a32, f##32##Barrier, __tsan_atomic32_##tsan_atomic_f,    \
+    kMacOrderBarrier)                                                          \
+  m(int64_t, int64_t, a64, f##64, __tsan_atomic64_##tsan_atomic_f,             \
+    kMacOrderNonBarrier)                                                       \
+  m(int64_t, int64_t, a64, f##64##Barrier, __tsan_atomic64_##tsan_atomic_f,    \
+    kMacOrderBarrier)
+
+#define OSATOMIC_INTERCEPTORS_BITWISE(f, tsan_atomic_f, m, m_orig)             \
+  m(int32_t, uint32_t, a32, f##32, __tsan_atomic32_##tsan_atomic_f,            \
+    kMacOrderNonBarrier)                                                       \
+  m(int32_t, uint32_t, a32, f##32##Barrier, __tsan_atomic32_##tsan_atomic_f,   \
+    kMacOrderBarrier)                                                          \
+  m_orig(int32_t, uint32_t, a32, f##32##Orig, __tsan_atomic32_##tsan_atomic_f, \
+    kMacOrderNonBarrier)                                                       \
+  m_orig(int32_t, uint32_t, a32, f##32##OrigBarrier,                           \
+    __tsan_atomic32_##tsan_atomic_f, kMacOrderBarrier)
+
+OSATOMIC_INTERCEPTORS_ARITHMETIC(OSAtomicAdd, fetch_add,
+                                 OSATOMIC_INTERCEPTOR_PLUS_X)
+OSATOMIC_INTERCEPTORS_ARITHMETIC(OSAtomicIncrement, fetch_add,
+                                 OSATOMIC_INTERCEPTOR_PLUS_1)
+OSATOMIC_INTERCEPTORS_ARITHMETIC(OSAtomicDecrement, fetch_sub,
+                                 OSATOMIC_INTERCEPTOR_MINUS_1)
+OSATOMIC_INTERCEPTORS_BITWISE(OSAtomicOr, fetch_or, OSATOMIC_INTERCEPTOR_PLUS_X,
+                              OSATOMIC_INTERCEPTOR)
+OSATOMIC_INTERCEPTORS_BITWISE(OSAtomicAnd, fetch_and,
+                              OSATOMIC_INTERCEPTOR_PLUS_X, OSATOMIC_INTERCEPTOR)
+OSATOMIC_INTERCEPTORS_BITWISE(OSAtomicXor, fetch_xor,
+                              OSATOMIC_INTERCEPTOR_PLUS_X, OSATOMIC_INTERCEPTOR)
+
+#define OSATOMIC_INTERCEPTORS_CAS(f, tsan_atomic_f, tsan_t, t)              \
+  TSAN_INTERCEPTOR(bool, f, t old_value, t new_value, t volatile *ptr) {    \
+    SCOPED_TSAN_INTERCEPTOR(f, old_value, new_value, ptr);                  \
+    return tsan_atomic_f##_compare_exchange_strong(                         \
+        (volatile tsan_t *)ptr, (tsan_t *)&old_value, (tsan_t)new_value,    \
+        kMacOrderNonBarrier, kMacFailureOrder);                             \
+  }                                                                         \
+                                                                            \
+  TSAN_INTERCEPTOR(bool, f##Barrier, t old_value, t new_value,              \
+                   t volatile *ptr) {                                       \
+    SCOPED_TSAN_INTERCEPTOR(f##Barrier, old_value, new_value, ptr);         \
+    return tsan_atomic_f##_compare_exchange_strong(                         \
+        (volatile tsan_t *)ptr, (tsan_t *)&old_value, (tsan_t)new_value,    \
+        kMacOrderBarrier, kMacFailureOrder);                                \
+  }
+
+OSATOMIC_INTERCEPTORS_CAS(OSAtomicCompareAndSwapInt, __tsan_atomic32, a32, int)
+OSATOMIC_INTERCEPTORS_CAS(OSAtomicCompareAndSwapLong, __tsan_atomic64, a64,
+                          long_t)
+OSATOMIC_INTERCEPTORS_CAS(OSAtomicCompareAndSwapPtr, __tsan_atomic64, a64,
+                          void *)
+OSATOMIC_INTERCEPTORS_CAS(OSAtomicCompareAndSwap32, __tsan_atomic32, a32,
+                          int32_t)
+OSATOMIC_INTERCEPTORS_CAS(OSAtomicCompareAndSwap64, __tsan_atomic64, a64,
+                          int64_t)
+
+#define OSATOMIC_INTERCEPTOR_BITOP(f, op, clear, mo)             \
+  TSAN_INTERCEPTOR(bool, f, uint32_t n, volatile void *ptr) {    \
+    SCOPED_TSAN_INTERCEPTOR(f, n, ptr);                          \
+    volatile char *byte_ptr = ((volatile char *)ptr) + (n >> 3); \
+    char bit = 0x80u >> (n & 7);                                 \
+    char mask = clear ? ~bit : bit;                              \
+    char orig_byte = op((volatile a8 *)byte_ptr, mask, mo);      \
+    return orig_byte & bit;                                      \
+  }
+
+#define OSATOMIC_INTERCEPTORS_BITOP(f, op, clear)               \
+  OSATOMIC_INTERCEPTOR_BITOP(f, op, clear, kMacOrderNonBarrier) \
+  OSATOMIC_INTERCEPTOR_BITOP(f##Barrier, op, clear, kMacOrderBarrier)
+
+OSATOMIC_INTERCEPTORS_BITOP(OSAtomicTestAndSet, __tsan_atomic8_fetch_or, false)
+OSATOMIC_INTERCEPTORS_BITOP(OSAtomicTestAndClear, __tsan_atomic8_fetch_and,
+                            true)
+
+TSAN_INTERCEPTOR(void, OSAtomicEnqueue, OSQueueHead *list, void *item,
+                 size_t offset) {
+  SCOPED_TSAN_INTERCEPTOR(OSAtomicEnqueue, list, item, offset);
+  __tsan_release(item);
+  REAL(OSAtomicEnqueue)(list, item, offset);
+}
+
+TSAN_INTERCEPTOR(void *, OSAtomicDequeue, OSQueueHead *list, size_t offset) {
+  SCOPED_TSAN_INTERCEPTOR(OSAtomicDequeue, list, offset);
+  void *item = REAL(OSAtomicDequeue)(list, offset);
+  if (item) __tsan_acquire(item);
+  return item;
+}
+
+// OSAtomicFifoEnqueue and OSAtomicFifoDequeue are only on OS X.
+#if !SANITIZER_IOS
+
+TSAN_INTERCEPTOR(void, OSAtomicFifoEnqueue, OSFifoQueueHead *list, void *item,
+                 size_t offset) {
+  SCOPED_TSAN_INTERCEPTOR(OSAtomicFifoEnqueue, list, item, offset);
+  __tsan_release(item);
+  REAL(OSAtomicFifoEnqueue)(list, item, offset);
+}
+
+TSAN_INTERCEPTOR(void *, OSAtomicFifoDequeue, OSFifoQueueHead *list,
+                 size_t offset) {
+  SCOPED_TSAN_INTERCEPTOR(OSAtomicFifoDequeue, list, offset);
+  void *item = REAL(OSAtomicFifoDequeue)(list, offset);
+  if (item) __tsan_acquire(item);
+  return item;
+}
+
+#endif
+
+TSAN_INTERCEPTOR(void, OSSpinLockLock, volatile OSSpinLock *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(OSSpinLockLock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(OSSpinLockLock, lock);
+  REAL(OSSpinLockLock)(lock);
+  Acquire(thr, pc, (uptr)lock);
+}
+
+TSAN_INTERCEPTOR(bool, OSSpinLockTry, volatile OSSpinLock *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(OSSpinLockTry)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(OSSpinLockTry, lock);
+  bool result = REAL(OSSpinLockTry)(lock);
+  if (result)
+    Acquire(thr, pc, (uptr)lock);
+  return result;
+}
+
+TSAN_INTERCEPTOR(void, OSSpinLockUnlock, volatile OSSpinLock *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(OSSpinLockUnlock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(OSSpinLockUnlock, lock);
+  Release(thr, pc, (uptr)lock);
+  REAL(OSSpinLockUnlock)(lock);
+}
+
+TSAN_INTERCEPTOR(void, os_lock_lock, void *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(os_lock_lock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_lock_lock, lock);
+  REAL(os_lock_lock)(lock);
+  Acquire(thr, pc, (uptr)lock);
+}
+
+TSAN_INTERCEPTOR(bool, os_lock_trylock, void *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(os_lock_trylock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_lock_trylock, lock);
+  bool result = REAL(os_lock_trylock)(lock);
+  if (result)
+    Acquire(thr, pc, (uptr)lock);
+  return result;
+}
+
+TSAN_INTERCEPTOR(void, os_lock_unlock, void *lock) {
+  CHECK(!cur_thread()->is_dead);
+  if (!cur_thread()->is_inited) {
+    return REAL(os_lock_unlock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_lock_unlock, lock);
+  Release(thr, pc, (uptr)lock);
+  REAL(os_lock_unlock)(lock);
+}
+
+TSAN_INTERCEPTOR(void, os_unfair_lock_lock, os_unfair_lock_t lock) {
+  if (!cur_thread()->is_inited || cur_thread()->is_dead) {
+    return REAL(os_unfair_lock_lock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_unfair_lock_lock, lock);
+  REAL(os_unfair_lock_lock)(lock);
+  Acquire(thr, pc, (uptr)lock);
+}
+
+TSAN_INTERCEPTOR(void, os_unfair_lock_lock_with_options, os_unfair_lock_t lock,
+                 u32 options) {
+  if (!cur_thread()->is_inited || cur_thread()->is_dead) {
+    return REAL(os_unfair_lock_lock_with_options)(lock, options);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_unfair_lock_lock_with_options, lock, options);
+  REAL(os_unfair_lock_lock_with_options)(lock, options);
+  Acquire(thr, pc, (uptr)lock);
+}
+
+TSAN_INTERCEPTOR(bool, os_unfair_lock_trylock, os_unfair_lock_t lock) {
+  if (!cur_thread()->is_inited || cur_thread()->is_dead) {
+    return REAL(os_unfair_lock_trylock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_unfair_lock_trylock, lock);
+  bool result = REAL(os_unfair_lock_trylock)(lock);
+  if (result)
+    Acquire(thr, pc, (uptr)lock);
+  return result;
+}
+
+TSAN_INTERCEPTOR(void, os_unfair_lock_unlock, os_unfair_lock_t lock) {
+  if (!cur_thread()->is_inited || cur_thread()->is_dead) {
+    return REAL(os_unfair_lock_unlock)(lock);
+  }
+  SCOPED_TSAN_INTERCEPTOR(os_unfair_lock_unlock, lock);
+  Release(thr, pc, (uptr)lock);
+  REAL(os_unfair_lock_unlock)(lock);
+}
+
+#if defined(__has_include) && __has_include(<xpc/xpc.h>)
+
+TSAN_INTERCEPTOR(void, xpc_connection_set_event_handler,
+                 xpc_connection_t connection, xpc_handler_t handler) {
+  SCOPED_TSAN_INTERCEPTOR(xpc_connection_set_event_handler, connection,
+                          handler);
+  Release(thr, pc, (uptr)connection);
+  xpc_handler_t new_handler = ^(xpc_object_t object) {
+    {
+      SCOPED_INTERCEPTOR_RAW(xpc_connection_set_event_handler);
+      Acquire(thr, pc, (uptr)connection);
+    }
+    handler(object);
+  };
+  REAL(xpc_connection_set_event_handler)(connection, new_handler);
+}
+
+TSAN_INTERCEPTOR(void, xpc_connection_send_barrier, xpc_connection_t connection,
+                 dispatch_block_t barrier) {
+  SCOPED_TSAN_INTERCEPTOR(xpc_connection_send_barrier, connection, barrier);
+  Release(thr, pc, (uptr)connection);
+  dispatch_block_t new_barrier = ^() {
+    {
+      SCOPED_INTERCEPTOR_RAW(xpc_connection_send_barrier);
+      Acquire(thr, pc, (uptr)connection);
+    }
+    barrier();
+  };
+  REAL(xpc_connection_send_barrier)(connection, new_barrier);
+}
+
+TSAN_INTERCEPTOR(void, xpc_connection_send_message_with_reply,
+                 xpc_connection_t connection, xpc_object_t message,
+                 dispatch_queue_t replyq, xpc_handler_t handler) {
+  SCOPED_TSAN_INTERCEPTOR(xpc_connection_send_message_with_reply, connection,
+                          message, replyq, handler);
+  Release(thr, pc, (uptr)connection);
+  xpc_handler_t new_handler = ^(xpc_object_t object) {
+    {
+      SCOPED_INTERCEPTOR_RAW(xpc_connection_send_message_with_reply);
+      Acquire(thr, pc, (uptr)connection);
+    }
+    handler(object);
+  };
+  REAL(xpc_connection_send_message_with_reply)
+  (connection, message, replyq, new_handler);
+}
+
+TSAN_INTERCEPTOR(void, xpc_connection_cancel, xpc_connection_t connection) {
+  SCOPED_TSAN_INTERCEPTOR(xpc_connection_cancel, connection);
+  Release(thr, pc, (uptr)connection);
+  REAL(xpc_connection_cancel)(connection);
+}
+
+#endif  // #if defined(__has_include) && __has_include(<xpc/xpc.h>)
+
+// Determines whether the Obj-C object pointer is a tagged pointer. Tagged
+// pointers encode the object data directly in their pointer bits and do not
+// have an associated memory allocation. The Obj-C runtime uses tagged pointers
+// to transparently optimize small objects.
+static bool IsTaggedObjCPointer(id obj) {
+  const uptr kPossibleTaggedBits = 0x8000000000000001ull;
+  return ((uptr)obj & kPossibleTaggedBits) != 0;
+}
+
+// Returns an address which can be used to inform TSan about synchronization
+// points (MutexLock/Unlock). The TSan infrastructure expects this to be a valid
+// address in the process space. We do a small allocation here to obtain a
+// stable address (the array backing the hash map can change). The memory is
+// never free'd (leaked) and allocation and locking are slow, but this code only
+// runs for @synchronized with tagged pointers, which is very rare.
+static uptr GetOrCreateSyncAddress(uptr addr, ThreadState *thr, uptr pc) {
+  typedef AddrHashMap<uptr, 5> Map;
+  static Map Addresses;
+  Map::Handle h(&Addresses, addr);
+  if (h.created()) {
+    ThreadIgnoreBegin(thr, pc);
+    *h = (uptr) user_alloc(thr, pc, /*size=*/1);
+    ThreadIgnoreEnd(thr);
+  }
+  return *h;
+}
+
+// Returns an address on which we can synchronize given an Obj-C object pointer.
+// For normal object pointers, this is just the address of the object in memory.
+// Tagged pointers are not backed by an actual memory allocation, so we need to
+// synthesize a valid address.
+static uptr SyncAddressForObjCObject(id obj, ThreadState *thr, uptr pc) {
+  if (IsTaggedObjCPointer(obj))
+    return GetOrCreateSyncAddress((uptr)obj, thr, pc);
+  return (uptr)obj;
+}
+
+TSAN_INTERCEPTOR(int, objc_sync_enter, id obj) {
+  SCOPED_TSAN_INTERCEPTOR(objc_sync_enter, obj);
+  if (!obj) return REAL(objc_sync_enter)(obj);
+  uptr addr = SyncAddressForObjCObject(obj, thr, pc);
+  MutexPreLock(thr, pc, addr, MutexFlagWriteReentrant);
+  int result = REAL(objc_sync_enter)(obj);
+  CHECK_EQ(result, OBJC_SYNC_SUCCESS);
+  MutexPostLock(thr, pc, addr, MutexFlagWriteReentrant);
+  return result;
+}
+
+TSAN_INTERCEPTOR(int, objc_sync_exit, id obj) {
+  SCOPED_TSAN_INTERCEPTOR(objc_sync_exit, obj);
+  if (!obj) return REAL(objc_sync_exit)(obj);
+  uptr addr = SyncAddressForObjCObject(obj, thr, pc);
+  MutexUnlock(thr, pc, addr);
+  int result = REAL(objc_sync_exit)(obj);
+  if (result != OBJC_SYNC_SUCCESS) MutexInvalidAccess(thr, pc, addr);
+  return result;
+}
+
+TSAN_INTERCEPTOR(int, swapcontext, ucontext_t *oucp, const ucontext_t *ucp) {
+  {
+    SCOPED_INTERCEPTOR_RAW(swapcontext, oucp, ucp);
+  }
+  // Because of swapcontext() semantics we have no option but to copy its
+  // implementation here
+  if (!oucp || !ucp) {
+    errno = EINVAL;
+    return -1;
+  }
+  ThreadState *thr = cur_thread();
+  const int UCF_SWAPPED = 0x80000000;
+  oucp->uc_onstack &= ~UCF_SWAPPED;
+  thr->ignore_interceptors++;
+  int ret = getcontext(oucp);
+  if (!(oucp->uc_onstack & UCF_SWAPPED)) {
+    thr->ignore_interceptors--;
+    if (!ret) {
+      oucp->uc_onstack |= UCF_SWAPPED;
+      ret = setcontext(ucp);
+    }
+  }
+  return ret;
+}
+
+// On macOS, libc++ is always linked dynamically, so intercepting works the
+// usual way.
+#define STDCXX_INTERCEPTOR TSAN_INTERCEPTOR
+
+namespace {
+struct fake_shared_weak_count {
+  volatile a64 shared_owners;
+  volatile a64 shared_weak_owners;
+  virtual void _unused_0x0() = 0;
+  virtual void _unused_0x8() = 0;
+  virtual void on_zero_shared() = 0;
+  virtual void _unused_0x18() = 0;
+  virtual void on_zero_shared_weak() = 0;
+  virtual ~fake_shared_weak_count() = 0;  // suppress -Wnon-virtual-dtor
+};
+}  // namespace
+
+// The following code adds libc++ interceptors for:
+//     void __shared_weak_count::__release_shared() _NOEXCEPT;
+//     bool __shared_count::__release_shared() _NOEXCEPT;
+// Shared and weak pointers in C++ maintain reference counts via atomics in
+// libc++.dylib, which are TSan-invisible, and this leads to false positives in
+// destructor code. These interceptors re-implements the whole functions so that
+// the mo_acq_rel semantics of the atomic decrement are visible.
+//
+// Unfortunately, the interceptors cannot simply Acquire/Release some sync
+// object and call the original function, because it would have a race between
+// the sync and the destruction of the object.  Calling both under a lock will
+// not work because the destructor can invoke this interceptor again (and even
+// in a 
diff erent thread, so recursive locks don't help).
+
+STDCXX_INTERCEPTOR(void, _ZNSt3__119__shared_weak_count16__release_sharedEv,
+                   fake_shared_weak_count *o) {
+  if (!flags()->shared_ptr_interceptor)
+    return REAL(_ZNSt3__119__shared_weak_count16__release_sharedEv)(o);
+
+  SCOPED_TSAN_INTERCEPTOR(_ZNSt3__119__shared_weak_count16__release_sharedEv,
+                          o);
+  if (__tsan_atomic64_fetch_add(&o->shared_owners, -1, mo_release) == 0) {
+    Acquire(thr, pc, (uptr)&o->shared_owners);
+    o->on_zero_shared();
+    if (__tsan_atomic64_fetch_add(&o->shared_weak_owners, -1, mo_release) ==
+        0) {
+      Acquire(thr, pc, (uptr)&o->shared_weak_owners);
+      o->on_zero_shared_weak();
+    }
+  }
+}
+
+STDCXX_INTERCEPTOR(bool, _ZNSt3__114__shared_count16__release_sharedEv,
+                   fake_shared_weak_count *o) {
+  if (!flags()->shared_ptr_interceptor)
+    return REAL(_ZNSt3__114__shared_count16__release_sharedEv)(o);
+
+  SCOPED_TSAN_INTERCEPTOR(_ZNSt3__114__shared_count16__release_sharedEv, o);
+  if (__tsan_atomic64_fetch_add(&o->shared_owners, -1, mo_release) == 0) {
+    Acquire(thr, pc, (uptr)&o->shared_owners);
+    o->on_zero_shared();
+    return true;
+  }
+  return false;
+}
+
+namespace {
+struct call_once_callback_args {
+  void (*orig_func)(void *arg);
+  void *orig_arg;
+  void *flag;
+};
+
+void call_once_callback_wrapper(void *arg) {
+  call_once_callback_args *new_args = (call_once_callback_args *)arg;
+  new_args->orig_func(new_args->orig_arg);
+  __tsan_release(new_args->flag);
+}
+}  // namespace
+
+// This adds a libc++ interceptor for:
+//     void __call_once(volatile unsigned long&, void*, void(*)(void*));
+// C++11 call_once is implemented via an internal function __call_once which is
+// inside libc++.dylib, and the atomic release store inside it is thus
+// TSan-invisible. To avoid false positives, this interceptor wraps the callback
+// function and performs an explicit Release after the user code has run.
+STDCXX_INTERCEPTOR(void, _ZNSt3__111__call_onceERVmPvPFvS2_E, void *flag,
+                   void *arg, void (*func)(void *arg)) {
+  call_once_callback_args new_args = {func, arg, flag};
+  REAL(_ZNSt3__111__call_onceERVmPvPFvS2_E)(flag, &new_args,
+                                            call_once_callback_wrapper);
+}
+
+}  // namespace __tsan
+
+#endif  // SANITIZER_MAC

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mach_vm.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mach_vm.cpp
new file mode 100644
index 0000000000000..6d62ff6a83825
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_mach_vm.cpp
@@ -0,0 +1,53 @@
+//===-- tsan_interceptors_mach_vm.cpp -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Interceptors for mach_vm_* user space memory routines on Darwin.
+//===----------------------------------------------------------------------===//
+
+#include "interception/interception.h"
+#include "tsan_interceptors.h"
+#include "tsan_platform.h"
+
+#include <mach/mach.h>
+
+namespace __tsan {
+
+static bool intersects_with_shadow(mach_vm_address_t address,
+                                   mach_vm_size_t size, int flags) {
+  // VM_FLAGS_FIXED is 0x0, so we have to test for VM_FLAGS_ANYWHERE.
+  if (flags & VM_FLAGS_ANYWHERE) return false;
+  return !IsAppMem(address) || !IsAppMem(address + size - 1);
+}
+
+TSAN_INTERCEPTOR(kern_return_t, mach_vm_allocate, vm_map_t target,
+                 mach_vm_address_t *address, mach_vm_size_t size, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(mach_vm_allocate, target, address, size, flags);
+  if (target != mach_task_self())
+    return REAL(mach_vm_allocate)(target, address, size, flags);
+  if (address && intersects_with_shadow(*address, size, flags))
+    return KERN_NO_SPACE;
+  kern_return_t kr = REAL(mach_vm_allocate)(target, address, size, flags);
+  if (kr == KERN_SUCCESS)
+    MemoryRangeImitateWriteOrResetRange(thr, pc, *address, size);
+  return kr;
+}
+
+TSAN_INTERCEPTOR(kern_return_t, mach_vm_deallocate, vm_map_t target,
+                 mach_vm_address_t address, mach_vm_size_t size) {
+  SCOPED_TSAN_INTERCEPTOR(mach_vm_deallocate, target, address, size);
+  if (target != mach_task_self())
+    return REAL(mach_vm_deallocate)(target, address, size);
+  kern_return_t kr = REAL(mach_vm_deallocate)(target, address, size);
+  if (kr == KERN_SUCCESS && address)
+    UnmapShadow(thr, address, size);
+  return kr;
+}
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_posix.cpp
new file mode 100644
index 0000000000000..cf3dc90d96a12
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interceptors_posix.cpp
@@ -0,0 +1,3015 @@
+//===-- tsan_interceptors_posix.cpp ---------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// FIXME: move as many interceptors as possible into
+// sanitizer_common/sanitizer_common_interceptors.inc
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_errno.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
+#include "sanitizer_common/sanitizer_platform_limits_posix.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_posix.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "sanitizer_common/sanitizer_tls_get_addr.h"
+#include "interception/interception.h"
+#include "tsan_interceptors.h"
+#include "tsan_interface.h"
+#include "tsan_platform.h"
+#include "tsan_suppressions.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+#include "tsan_fd.h"
+
+#include <stdarg.h>
+
+using namespace __tsan;
+
+#if SANITIZER_FREEBSD || SANITIZER_MAC
+#define stdout __stdoutp
+#define stderr __stderrp
+#endif
+
+#if SANITIZER_NETBSD
+#define dirfd(dirp) (*(int *)(dirp))
+#define fileno_unlocked(fp)              \
+  (((__sanitizer_FILE *)fp)->_file == -1 \
+       ? -1                              \
+       : (int)(unsigned short)(((__sanitizer_FILE *)fp)->_file))
+
+#define stdout ((__sanitizer_FILE*)&__sF[1])
+#define stderr ((__sanitizer_FILE*)&__sF[2])
+
+#define nanosleep __nanosleep50
+#define vfork __vfork14
+#endif
+
+#ifdef __mips__
+const int kSigCount = 129;
+#else
+const int kSigCount = 65;
+#endif
+
+#ifdef __mips__
+struct ucontext_t {
+  u64 opaque[768 / sizeof(u64) + 1];
+};
+#else
+struct ucontext_t {
+  // The size is determined by looking at sizeof of real ucontext_t on linux.
+  u64 opaque[936 / sizeof(u64) + 1];
+};
+#endif
+
+#if defined(__x86_64__) || defined(__mips__) || SANITIZER_PPC64V1 || \
+    defined(__s390x__)
+#define PTHREAD_ABI_BASE  "GLIBC_2.3.2"
+#elif defined(__aarch64__) || SANITIZER_PPC64V2
+#define PTHREAD_ABI_BASE  "GLIBC_2.17"
+#endif
+
+extern "C" int pthread_attr_init(void *attr);
+extern "C" int pthread_attr_destroy(void *attr);
+DECLARE_REAL(int, pthread_attr_getdetachstate, void *, void *)
+extern "C" int pthread_attr_setstacksize(void *attr, uptr stacksize);
+extern "C" int pthread_atfork(void (*prepare)(void), void (*parent)(void),
+                              void (*child)(void));
+extern "C" int pthread_key_create(unsigned *key, void (*destructor)(void* v));
+extern "C" int pthread_setspecific(unsigned key, const void *v);
+DECLARE_REAL(int, pthread_mutexattr_gettype, void *, void *)
+DECLARE_REAL(int, fflush, __sanitizer_FILE *fp)
+DECLARE_REAL_AND_INTERCEPTOR(void *, malloc, uptr size)
+DECLARE_REAL_AND_INTERCEPTOR(void, free, void *ptr)
+extern "C" int pthread_equal(void *t1, void *t2);
+extern "C" void *pthread_self();
+extern "C" void _exit(int status);
+#if !SANITIZER_NETBSD
+extern "C" int fileno_unlocked(void *stream);
+extern "C" int dirfd(void *dirp);
+#endif
+#if SANITIZER_NETBSD
+extern __sanitizer_FILE __sF[];
+#else
+extern __sanitizer_FILE *stdout, *stderr;
+#endif
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD
+const int PTHREAD_MUTEX_RECURSIVE = 1;
+const int PTHREAD_MUTEX_RECURSIVE_NP = 1;
+#else
+const int PTHREAD_MUTEX_RECURSIVE = 2;
+const int PTHREAD_MUTEX_RECURSIVE_NP = 2;
+#endif
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD
+const int EPOLL_CTL_ADD = 1;
+#endif
+const int SIGILL = 4;
+const int SIGTRAP = 5;
+const int SIGABRT = 6;
+const int SIGFPE = 8;
+const int SIGSEGV = 11;
+const int SIGPIPE = 13;
+const int SIGTERM = 15;
+#if defined(__mips__) || SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD
+const int SIGBUS = 10;
+const int SIGSYS = 12;
+#else
+const int SIGBUS = 7;
+const int SIGSYS = 31;
+#endif
+void *const MAP_FAILED = (void*)-1;
+#if SANITIZER_NETBSD
+const int PTHREAD_BARRIER_SERIAL_THREAD = 1234567;
+#elif !SANITIZER_MAC
+const int PTHREAD_BARRIER_SERIAL_THREAD = -1;
+#endif
+const int MAP_FIXED = 0x10;
+typedef long long_t;
+typedef __sanitizer::u16 mode_t;
+
+// From /usr/include/unistd.h
+# define F_ULOCK 0      /* Unlock a previously locked region.  */
+# define F_LOCK  1      /* Lock a region for exclusive use.  */
+# define F_TLOCK 2      /* Test and lock a region for exclusive use.  */
+# define F_TEST  3      /* Test a region for other processes locks.  */
+
+#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD
+const int SA_SIGINFO = 0x40;
+const int SIG_SETMASK = 3;
+#elif defined(__mips__)
+const int SA_SIGINFO = 8;
+const int SIG_SETMASK = 3;
+#else
+const int SA_SIGINFO = 4;
+const int SIG_SETMASK = 2;
+#endif
+
+#define COMMON_INTERCEPTOR_NOTHING_IS_INITIALIZED \
+  (!cur_thread_init()->is_inited)
+
+namespace __tsan {
+struct SignalDesc {
+  bool armed;
+  __sanitizer_siginfo siginfo;
+  ucontext_t ctx;
+};
+
+struct ThreadSignalContext {
+  int int_signal_send;
+  atomic_uintptr_t in_blocking_func;
+  SignalDesc pending_signals[kSigCount];
+  // emptyset and oldset are too big for stack.
+  __sanitizer_sigset_t emptyset;
+  __sanitizer_sigset_t oldset;
+};
+
+// The sole reason tsan wraps atexit callbacks is to establish synchronization
+// between callback setup and callback execution.
+struct AtExitCtx {
+  void (*f)();
+  void *arg;
+  uptr pc;
+};
+
+// InterceptorContext holds all global data required for interceptors.
+// It's explicitly constructed in InitializeInterceptors with placement new
+// and is never destroyed. This allows usage of members with non-trivial
+// constructors and destructors.
+struct InterceptorContext {
+  // The object is 64-byte aligned, because we want hot data to be located
+  // in a single cache line if possible (it's accessed in every interceptor).
+  ALIGNED(64) LibIgnore libignore;
+  __sanitizer_sigaction sigactions[kSigCount];
+#if !SANITIZER_MAC && !SANITIZER_NETBSD
+  unsigned finalize_key;
+#endif
+
+  Mutex atexit_mu;
+  Vector<struct AtExitCtx *> AtExitStack;
+
+  InterceptorContext() : libignore(LINKER_INITIALIZED), atexit_mu(MutexTypeAtExit), AtExitStack() {}
+};
+
+static ALIGNED(64) char interceptor_placeholder[sizeof(InterceptorContext)];
+InterceptorContext *interceptor_ctx() {
+  return reinterpret_cast<InterceptorContext*>(&interceptor_placeholder[0]);
+}
+
+LibIgnore *libignore() {
+  return &interceptor_ctx()->libignore;
+}
+
+void InitializeLibIgnore() {
+  const SuppressionContext &supp = *Suppressions();
+  const uptr n = supp.SuppressionCount();
+  for (uptr i = 0; i < n; i++) {
+    const Suppression *s = supp.SuppressionAt(i);
+    if (0 == internal_strcmp(s->type, kSuppressionLib))
+      libignore()->AddIgnoredLibrary(s->templ);
+  }
+  if (flags()->ignore_noninstrumented_modules)
+    libignore()->IgnoreNoninstrumentedModules(true);
+  libignore()->OnLibraryLoaded(0);
+}
+
+// The following two hooks can be used by for cooperative scheduling when
+// locking.
+#ifdef TSAN_EXTERNAL_HOOKS
+void OnPotentiallyBlockingRegionBegin();
+void OnPotentiallyBlockingRegionEnd();
+#else
+SANITIZER_WEAK_CXX_DEFAULT_IMPL void OnPotentiallyBlockingRegionBegin() {}
+SANITIZER_WEAK_CXX_DEFAULT_IMPL void OnPotentiallyBlockingRegionEnd() {}
+#endif
+
+}  // namespace __tsan
+
+static ThreadSignalContext *SigCtx(ThreadState *thr) {
+  ThreadSignalContext *ctx = (ThreadSignalContext*)thr->signal_ctx;
+  if (ctx == 0 && !thr->is_dead) {
+    ctx = (ThreadSignalContext*)MmapOrDie(sizeof(*ctx), "ThreadSignalContext");
+    MemoryResetRange(thr, (uptr)&SigCtx, (uptr)ctx, sizeof(*ctx));
+    thr->signal_ctx = ctx;
+  }
+  return ctx;
+}
+
+ScopedInterceptor::ScopedInterceptor(ThreadState *thr, const char *fname,
+                                     uptr pc)
+    : thr_(thr), in_ignored_lib_(false), ignoring_(false) {
+  LazyInitialize(thr);
+  if (!thr_->is_inited) return;
+  if (!thr_->ignore_interceptors) FuncEntry(thr, pc);
+  DPrintf("#%d: intercept %s()\n", thr_->tid, fname);
+  ignoring_ =
+      !thr_->in_ignored_lib && (flags()->ignore_interceptors_accesses ||
+                                libignore()->IsIgnored(pc, &in_ignored_lib_));
+  EnableIgnores();
+}
+
+ScopedInterceptor::~ScopedInterceptor() {
+  if (!thr_->is_inited) return;
+  DisableIgnores();
+  if (!thr_->ignore_interceptors) {
+    ProcessPendingSignals(thr_);
+    FuncExit(thr_);
+    CheckedMutex::CheckNoLocks();
+  }
+}
+
+NOINLINE
+void ScopedInterceptor::EnableIgnoresImpl() {
+  ThreadIgnoreBegin(thr_, 0);
+  if (flags()->ignore_noninstrumented_modules)
+    thr_->suppress_reports++;
+  if (in_ignored_lib_) {
+    DCHECK(!thr_->in_ignored_lib);
+    thr_->in_ignored_lib = true;
+  }
+}
+
+NOINLINE
+void ScopedInterceptor::DisableIgnoresImpl() {
+  ThreadIgnoreEnd(thr_);
+  if (flags()->ignore_noninstrumented_modules)
+    thr_->suppress_reports--;
+  if (in_ignored_lib_) {
+    DCHECK(thr_->in_ignored_lib);
+    thr_->in_ignored_lib = false;
+  }
+}
+
+#define TSAN_INTERCEPT(func) INTERCEPT_FUNCTION(func)
+#if SANITIZER_FREEBSD
+# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func)
+# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func)
+# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func)
+#elif SANITIZER_NETBSD
+# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION(func)
+# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func) \
+         INTERCEPT_FUNCTION(__libc_##func)
+# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func) \
+         INTERCEPT_FUNCTION(__libc_thr_##func)
+#else
+# define TSAN_INTERCEPT_VER(func, ver) INTERCEPT_FUNCTION_VER(func, ver)
+# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(func)
+# define TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(func)
+#endif
+
+#define READ_STRING_OF_LEN(thr, pc, s, len, n)                 \
+  MemoryAccessRange((thr), (pc), (uptr)(s),                         \
+    common_flags()->strict_string_checks ? (len) + 1 : (n), false)
+
+#define READ_STRING(thr, pc, s, n)                             \
+    READ_STRING_OF_LEN((thr), (pc), (s), internal_strlen(s), (n))
+
+#define BLOCK_REAL(name) (BlockingCall(thr), REAL(name))
+
+struct BlockingCall {
+  explicit BlockingCall(ThreadState *thr)
+      : thr(thr)
+      , ctx(SigCtx(thr)) {
+    for (;;) {
+      atomic_store(&ctx->in_blocking_func, 1, memory_order_relaxed);
+      if (atomic_load(&thr->pending_signals, memory_order_relaxed) == 0)
+        break;
+      atomic_store(&ctx->in_blocking_func, 0, memory_order_relaxed);
+      ProcessPendingSignals(thr);
+    }
+    // When we are in a "blocking call", we process signals asynchronously
+    // (right when they arrive). In this context we do not expect to be
+    // executing any user/runtime code. The known interceptor sequence when
+    // this is not true is: pthread_join -> munmap(stack). It's fine
+    // to ignore munmap in this case -- we handle stack shadow separately.
+    thr->ignore_interceptors++;
+  }
+
+  ~BlockingCall() {
+    thr->ignore_interceptors--;
+    atomic_store(&ctx->in_blocking_func, 0, memory_order_relaxed);
+  }
+
+  ThreadState *thr;
+  ThreadSignalContext *ctx;
+};
+
+TSAN_INTERCEPTOR(unsigned, sleep, unsigned sec) {
+  SCOPED_TSAN_INTERCEPTOR(sleep, sec);
+  unsigned res = BLOCK_REAL(sleep)(sec);
+  AfterSleep(thr, pc);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, usleep, long_t usec) {
+  SCOPED_TSAN_INTERCEPTOR(usleep, usec);
+  int res = BLOCK_REAL(usleep)(usec);
+  AfterSleep(thr, pc);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, nanosleep, void *req, void *rem) {
+  SCOPED_TSAN_INTERCEPTOR(nanosleep, req, rem);
+  int res = BLOCK_REAL(nanosleep)(req, rem);
+  AfterSleep(thr, pc);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pause, int fake) {
+  SCOPED_TSAN_INTERCEPTOR(pause, fake);
+  return BLOCK_REAL(pause)(fake);
+}
+
+// Note: we specifically call the function in such strange way
+// with "installed_at" because in reports it will appear between
+// callback frames and the frame that installed the callback.
+static void at_exit_callback_installed_at() {
+  AtExitCtx *ctx;
+  {
+    // Ensure thread-safety.
+    Lock l(&interceptor_ctx()->atexit_mu);
+
+    // Pop AtExitCtx from the top of the stack of callback functions
+    uptr element = interceptor_ctx()->AtExitStack.Size() - 1;
+    ctx = interceptor_ctx()->AtExitStack[element];
+    interceptor_ctx()->AtExitStack.PopBack();
+  }
+
+  ThreadState *thr = cur_thread();
+  Acquire(thr, ctx->pc, (uptr)ctx);
+  FuncEntry(thr, ctx->pc);
+  ((void(*)())ctx->f)();
+  FuncExit(thr);
+  Free(ctx);
+}
+
+static void cxa_at_exit_callback_installed_at(void *arg) {
+  ThreadState *thr = cur_thread();
+  AtExitCtx *ctx = (AtExitCtx*)arg;
+  Acquire(thr, ctx->pc, (uptr)arg);
+  FuncEntry(thr, ctx->pc);
+  ((void(*)(void *arg))ctx->f)(ctx->arg);
+  FuncExit(thr);
+  Free(ctx);
+}
+
+static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
+      void *arg, void *dso);
+
+#if !SANITIZER_ANDROID
+TSAN_INTERCEPTOR(int, atexit, void (*f)()) {
+  if (in_symbolizer())
+    return 0;
+  // We want to setup the atexit callback even if we are in ignored lib
+  // or after fork.
+  SCOPED_INTERCEPTOR_RAW(atexit, f);
+  return setup_at_exit_wrapper(thr, GET_CALLER_PC(), (void (*)())f, 0, 0);
+}
+#endif
+
+TSAN_INTERCEPTOR(int, __cxa_atexit, void (*f)(void *a), void *arg, void *dso) {
+  if (in_symbolizer())
+    return 0;
+  SCOPED_TSAN_INTERCEPTOR(__cxa_atexit, f, arg, dso);
+  return setup_at_exit_wrapper(thr, GET_CALLER_PC(), (void (*)())f, arg, dso);
+}
+
+static int setup_at_exit_wrapper(ThreadState *thr, uptr pc, void(*f)(),
+      void *arg, void *dso) {
+  auto *ctx = New<AtExitCtx>();
+  ctx->f = f;
+  ctx->arg = arg;
+  ctx->pc = pc;
+  Release(thr, pc, (uptr)ctx);
+  // Memory allocation in __cxa_atexit will race with free during exit,
+  // because we do not see synchronization around atexit callback list.
+  ThreadIgnoreBegin(thr, pc);
+  int res;
+  if (!dso) {
+    // NetBSD does not preserve the 2nd argument if dso is equal to 0
+    // Store ctx in a local stack-like structure
+
+    // Ensure thread-safety.
+    Lock l(&interceptor_ctx()->atexit_mu);
+    // __cxa_atexit calls calloc. If we don't ignore interceptors, we will fail
+    // due to atexit_mu held on exit from the calloc interceptor.
+    ScopedIgnoreInterceptors ignore;
+
+    res = REAL(__cxa_atexit)((void (*)(void *a))at_exit_callback_installed_at,
+                             0, 0);
+    // Push AtExitCtx on the top of the stack of callback functions
+    if (!res) {
+      interceptor_ctx()->AtExitStack.PushBack(ctx);
+    }
+  } else {
+    res = REAL(__cxa_atexit)(cxa_at_exit_callback_installed_at, ctx, dso);
+  }
+  ThreadIgnoreEnd(thr);
+  return res;
+}
+
+#if !SANITIZER_MAC && !SANITIZER_NETBSD
+static void on_exit_callback_installed_at(int status, void *arg) {
+  ThreadState *thr = cur_thread();
+  AtExitCtx *ctx = (AtExitCtx*)arg;
+  Acquire(thr, ctx->pc, (uptr)arg);
+  FuncEntry(thr, ctx->pc);
+  ((void(*)(int status, void *arg))ctx->f)(status, ctx->arg);
+  FuncExit(thr);
+  Free(ctx);
+}
+
+TSAN_INTERCEPTOR(int, on_exit, void(*f)(int, void*), void *arg) {
+  if (in_symbolizer())
+    return 0;
+  SCOPED_TSAN_INTERCEPTOR(on_exit, f, arg);
+  auto *ctx = New<AtExitCtx>();
+  ctx->f = (void(*)())f;
+  ctx->arg = arg;
+  ctx->pc = GET_CALLER_PC();
+  Release(thr, pc, (uptr)ctx);
+  // Memory allocation in __cxa_atexit will race with free during exit,
+  // because we do not see synchronization around atexit callback list.
+  ThreadIgnoreBegin(thr, pc);
+  int res = REAL(on_exit)(on_exit_callback_installed_at, ctx);
+  ThreadIgnoreEnd(thr);
+  return res;
+}
+#define TSAN_MAYBE_INTERCEPT_ON_EXIT TSAN_INTERCEPT(on_exit)
+#else
+#define TSAN_MAYBE_INTERCEPT_ON_EXIT
+#endif
+
+// Cleanup old bufs.
+static void JmpBufGarbageCollect(ThreadState *thr, uptr sp) {
+  for (uptr i = 0; i < thr->jmp_bufs.Size(); i++) {
+    JmpBuf *buf = &thr->jmp_bufs[i];
+    if (buf->sp <= sp) {
+      uptr sz = thr->jmp_bufs.Size();
+      internal_memcpy(buf, &thr->jmp_bufs[sz - 1], sizeof(*buf));
+      thr->jmp_bufs.PopBack();
+      i--;
+    }
+  }
+}
+
+static void SetJmp(ThreadState *thr, uptr sp) {
+  if (!thr->is_inited)  // called from libc guts during bootstrap
+    return;
+  // Cleanup old bufs.
+  JmpBufGarbageCollect(thr, sp);
+  // Remember the buf.
+  JmpBuf *buf = thr->jmp_bufs.PushBack();
+  buf->sp = sp;
+  buf->shadow_stack_pos = thr->shadow_stack_pos;
+  ThreadSignalContext *sctx = SigCtx(thr);
+  buf->int_signal_send = sctx ? sctx->int_signal_send : 0;
+  buf->in_blocking_func = sctx ?
+      atomic_load(&sctx->in_blocking_func, memory_order_relaxed) :
+      false;
+  buf->in_signal_handler = atomic_load(&thr->in_signal_handler,
+      memory_order_relaxed);
+}
+
+static void LongJmp(ThreadState *thr, uptr *env) {
+  uptr sp = ExtractLongJmpSp(env);
+  // Find the saved buf with matching sp.
+  for (uptr i = 0; i < thr->jmp_bufs.Size(); i++) {
+    JmpBuf *buf = &thr->jmp_bufs[i];
+    if (buf->sp == sp) {
+      CHECK_GE(thr->shadow_stack_pos, buf->shadow_stack_pos);
+      // Unwind the stack.
+      while (thr->shadow_stack_pos > buf->shadow_stack_pos)
+        FuncExit(thr);
+      ThreadSignalContext *sctx = SigCtx(thr);
+      if (sctx) {
+        sctx->int_signal_send = buf->int_signal_send;
+        atomic_store(&sctx->in_blocking_func, buf->in_blocking_func,
+            memory_order_relaxed);
+      }
+      atomic_store(&thr->in_signal_handler, buf->in_signal_handler,
+          memory_order_relaxed);
+      JmpBufGarbageCollect(thr, buf->sp - 1);  // do not collect buf->sp
+      return;
+    }
+  }
+  Printf("ThreadSanitizer: can't find longjmp buf\n");
+  CHECK(0);
+}
+
+// FIXME: put everything below into a common extern "C" block?
+extern "C" void __tsan_setjmp(uptr sp) { SetJmp(cur_thread_init(), sp); }
+
+#if SANITIZER_MAC
+TSAN_INTERCEPTOR(int, setjmp, void *env);
+TSAN_INTERCEPTOR(int, _setjmp, void *env);
+TSAN_INTERCEPTOR(int, sigsetjmp, void *env);
+#else  // SANITIZER_MAC
+
+#if SANITIZER_NETBSD
+#define setjmp_symname __setjmp14
+#define sigsetjmp_symname __sigsetjmp14
+#else
+#define setjmp_symname setjmp
+#define sigsetjmp_symname sigsetjmp
+#endif
+
+#define TSAN_INTERCEPTOR_SETJMP_(x) __interceptor_ ## x
+#define TSAN_INTERCEPTOR_SETJMP__(x) TSAN_INTERCEPTOR_SETJMP_(x)
+#define TSAN_INTERCEPTOR_SETJMP TSAN_INTERCEPTOR_SETJMP__(setjmp_symname)
+#define TSAN_INTERCEPTOR_SIGSETJMP TSAN_INTERCEPTOR_SETJMP__(sigsetjmp_symname)
+
+#define TSAN_STRING_SETJMP SANITIZER_STRINGIFY(setjmp_symname)
+#define TSAN_STRING_SIGSETJMP SANITIZER_STRINGIFY(sigsetjmp_symname)
+
+// Not called.  Merely to satisfy TSAN_INTERCEPT().
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+int TSAN_INTERCEPTOR_SETJMP(void *env);
+extern "C" int TSAN_INTERCEPTOR_SETJMP(void *env) {
+  CHECK(0);
+  return 0;
+}
+
+// FIXME: any reason to have a separate declaration?
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+int __interceptor__setjmp(void *env);
+extern "C" int __interceptor__setjmp(void *env) {
+  CHECK(0);
+  return 0;
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+int TSAN_INTERCEPTOR_SIGSETJMP(void *env);
+extern "C" int TSAN_INTERCEPTOR_SIGSETJMP(void *env) {
+  CHECK(0);
+  return 0;
+}
+
+#if !SANITIZER_NETBSD
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+int __interceptor___sigsetjmp(void *env);
+extern "C" int __interceptor___sigsetjmp(void *env) {
+  CHECK(0);
+  return 0;
+}
+#endif
+
+extern "C" int setjmp_symname(void *env);
+extern "C" int _setjmp(void *env);
+extern "C" int sigsetjmp_symname(void *env);
+#if !SANITIZER_NETBSD
+extern "C" int __sigsetjmp(void *env);
+#endif
+DEFINE_REAL(int, setjmp_symname, void *env)
+DEFINE_REAL(int, _setjmp, void *env)
+DEFINE_REAL(int, sigsetjmp_symname, void *env)
+#if !SANITIZER_NETBSD
+DEFINE_REAL(int, __sigsetjmp, void *env)
+#endif
+#endif  // SANITIZER_MAC
+
+#if SANITIZER_NETBSD
+#define longjmp_symname __longjmp14
+#define siglongjmp_symname __siglongjmp14
+#else
+#define longjmp_symname longjmp
+#define siglongjmp_symname siglongjmp
+#endif
+
+TSAN_INTERCEPTOR(void, longjmp_symname, uptr *env, int val) {
+  // Note: if we call REAL(longjmp) in the context of ScopedInterceptor,
+  // bad things will happen. We will jump over ScopedInterceptor dtor and can
+  // leave thr->in_ignored_lib set.
+  {
+    SCOPED_INTERCEPTOR_RAW(longjmp_symname, env, val);
+  }
+  LongJmp(cur_thread(), env);
+  REAL(longjmp_symname)(env, val);
+}
+
+TSAN_INTERCEPTOR(void, siglongjmp_symname, uptr *env, int val) {
+  {
+    SCOPED_INTERCEPTOR_RAW(siglongjmp_symname, env, val);
+  }
+  LongJmp(cur_thread(), env);
+  REAL(siglongjmp_symname)(env, val);
+}
+
+#if SANITIZER_NETBSD
+TSAN_INTERCEPTOR(void, _longjmp, uptr *env, int val) {
+  {
+    SCOPED_INTERCEPTOR_RAW(_longjmp, env, val);
+  }
+  LongJmp(cur_thread(), env);
+  REAL(_longjmp)(env, val);
+}
+#endif
+
+#if !SANITIZER_MAC
+TSAN_INTERCEPTOR(void*, malloc, uptr size) {
+  if (in_symbolizer())
+    return InternalAlloc(size);
+  void *p = 0;
+  {
+    SCOPED_INTERCEPTOR_RAW(malloc, size);
+    p = user_alloc(thr, pc, size);
+  }
+  invoke_malloc_hook(p, size);
+  return p;
+}
+
+// In glibc<2.25, dynamic TLS blocks are allocated by __libc_memalign. Intercept
+// __libc_memalign so that (1) we can detect races (2) free will not be called
+// on libc internally allocated blocks.
+TSAN_INTERCEPTOR(void*, __libc_memalign, uptr align, uptr sz) {
+  SCOPED_INTERCEPTOR_RAW(__libc_memalign, align, sz);
+  return user_memalign(thr, pc, align, sz);
+}
+
+TSAN_INTERCEPTOR(void*, calloc, uptr size, uptr n) {
+  if (in_symbolizer())
+    return InternalCalloc(size, n);
+  void *p = 0;
+  {
+    SCOPED_INTERCEPTOR_RAW(calloc, size, n);
+    p = user_calloc(thr, pc, size, n);
+  }
+  invoke_malloc_hook(p, n * size);
+  return p;
+}
+
+TSAN_INTERCEPTOR(void*, realloc, void *p, uptr size) {
+  if (in_symbolizer())
+    return InternalRealloc(p, size);
+  if (p)
+    invoke_free_hook(p);
+  {
+    SCOPED_INTERCEPTOR_RAW(realloc, p, size);
+    p = user_realloc(thr, pc, p, size);
+  }
+  invoke_malloc_hook(p, size);
+  return p;
+}
+
+TSAN_INTERCEPTOR(void*, reallocarray, void *p, uptr size, uptr n) {
+  if (in_symbolizer())
+    return InternalReallocArray(p, size, n);
+  if (p)
+    invoke_free_hook(p);
+  {
+    SCOPED_INTERCEPTOR_RAW(reallocarray, p, size, n);
+    p = user_reallocarray(thr, pc, p, size, n);
+  }
+  invoke_malloc_hook(p, size);
+  return p;
+}
+
+TSAN_INTERCEPTOR(void, free, void *p) {
+  if (p == 0)
+    return;
+  if (in_symbolizer())
+    return InternalFree(p);
+  invoke_free_hook(p);
+  SCOPED_INTERCEPTOR_RAW(free, p);
+  user_free(thr, pc, p);
+}
+
+TSAN_INTERCEPTOR(void, cfree, void *p) {
+  if (p == 0)
+    return;
+  if (in_symbolizer())
+    return InternalFree(p);
+  invoke_free_hook(p);
+  SCOPED_INTERCEPTOR_RAW(cfree, p);
+  user_free(thr, pc, p);
+}
+
+TSAN_INTERCEPTOR(uptr, malloc_usable_size, void *p) {
+  SCOPED_INTERCEPTOR_RAW(malloc_usable_size, p);
+  return user_alloc_usable_size(p);
+}
+#endif
+
+TSAN_INTERCEPTOR(char *, strcpy, char *dst, const char *src) {
+  SCOPED_TSAN_INTERCEPTOR(strcpy, dst, src);
+  uptr srclen = internal_strlen(src);
+  MemoryAccessRange(thr, pc, (uptr)dst, srclen + 1, true);
+  MemoryAccessRange(thr, pc, (uptr)src, srclen + 1, false);
+  return REAL(strcpy)(dst, src);
+}
+
+TSAN_INTERCEPTOR(char*, strncpy, char *dst, char *src, uptr n) {
+  SCOPED_TSAN_INTERCEPTOR(strncpy, dst, src, n);
+  uptr srclen = internal_strnlen(src, n);
+  MemoryAccessRange(thr, pc, (uptr)dst, n, true);
+  MemoryAccessRange(thr, pc, (uptr)src, min(srclen + 1, n), false);
+  return REAL(strncpy)(dst, src, n);
+}
+
+TSAN_INTERCEPTOR(char*, strdup, const char *str) {
+  SCOPED_TSAN_INTERCEPTOR(strdup, str);
+  // strdup will call malloc, so no instrumentation is required here.
+  return REAL(strdup)(str);
+}
+
+// Zero out addr if it points into shadow memory and was provided as a hint
+// only, i.e., MAP_FIXED is not set.
+static bool fix_mmap_addr(void **addr, long_t sz, int flags) {
+  if (*addr) {
+    if (!IsAppMem((uptr)*addr) || !IsAppMem((uptr)*addr + sz - 1)) {
+      if (flags & MAP_FIXED) {
+        errno = errno_EINVAL;
+        return false;
+      } else {
+        *addr = 0;
+      }
+    }
+  }
+  return true;
+}
+
+template <class Mmap>
+static void *mmap_interceptor(ThreadState *thr, uptr pc, Mmap real_mmap,
+                              void *addr, SIZE_T sz, int prot, int flags,
+                              int fd, OFF64_T off) {
+  if (!fix_mmap_addr(&addr, sz, flags)) return MAP_FAILED;
+  void *res = real_mmap(addr, sz, prot, flags, fd, off);
+  if (res != MAP_FAILED) {
+    if (!IsAppMem((uptr)res) || !IsAppMem((uptr)res + sz - 1)) {
+      Report("ThreadSanitizer: mmap at bad address: addr=%p size=%p res=%p\n",
+             addr, (void*)sz, res);
+      Die();
+    }
+    if (fd > 0) FdAccess(thr, pc, fd);
+    MemoryRangeImitateWriteOrResetRange(thr, pc, (uptr)res, sz);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, munmap, void *addr, long_t sz) {
+  SCOPED_TSAN_INTERCEPTOR(munmap, addr, sz);
+  UnmapShadow(thr, (uptr)addr, sz);
+  int res = REAL(munmap)(addr, sz);
+  return res;
+}
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(void*, memalign, uptr align, uptr sz) {
+  SCOPED_INTERCEPTOR_RAW(memalign, align, sz);
+  return user_memalign(thr, pc, align, sz);
+}
+#define TSAN_MAYBE_INTERCEPT_MEMALIGN TSAN_INTERCEPT(memalign)
+#else
+#define TSAN_MAYBE_INTERCEPT_MEMALIGN
+#endif
+
+#if !SANITIZER_MAC
+TSAN_INTERCEPTOR(void*, aligned_alloc, uptr align, uptr sz) {
+  if (in_symbolizer())
+    return InternalAlloc(sz, nullptr, align);
+  SCOPED_INTERCEPTOR_RAW(aligned_alloc, align, sz);
+  return user_aligned_alloc(thr, pc, align, sz);
+}
+
+TSAN_INTERCEPTOR(void*, valloc, uptr sz) {
+  if (in_symbolizer())
+    return InternalAlloc(sz, nullptr, GetPageSizeCached());
+  SCOPED_INTERCEPTOR_RAW(valloc, sz);
+  return user_valloc(thr, pc, sz);
+}
+#endif
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(void*, pvalloc, uptr sz) {
+  if (in_symbolizer()) {
+    uptr PageSize = GetPageSizeCached();
+    sz = sz ? RoundUpTo(sz, PageSize) : PageSize;
+    return InternalAlloc(sz, nullptr, PageSize);
+  }
+  SCOPED_INTERCEPTOR_RAW(pvalloc, sz);
+  return user_pvalloc(thr, pc, sz);
+}
+#define TSAN_MAYBE_INTERCEPT_PVALLOC TSAN_INTERCEPT(pvalloc)
+#else
+#define TSAN_MAYBE_INTERCEPT_PVALLOC
+#endif
+
+#if !SANITIZER_MAC
+TSAN_INTERCEPTOR(int, posix_memalign, void **memptr, uptr align, uptr sz) {
+  if (in_symbolizer()) {
+    void *p = InternalAlloc(sz, nullptr, align);
+    if (!p)
+      return errno_ENOMEM;
+    *memptr = p;
+    return 0;
+  }
+  SCOPED_INTERCEPTOR_RAW(posix_memalign, memptr, align, sz);
+  return user_posix_memalign(thr, pc, memptr, align, sz);
+}
+#endif
+
+// Both __cxa_guard_acquire and pthread_once 0-initialize
+// the object initially. pthread_once does not have any
+// other ABI requirements. __cxa_guard_acquire assumes
+// that any non-0 value in the first byte means that
+// initialization is completed. Contents of the remaining
+// bytes are up to us.
+constexpr u32 kGuardInit = 0;
+constexpr u32 kGuardDone = 1;
+constexpr u32 kGuardRunning = 1 << 16;
+constexpr u32 kGuardWaiter = 1 << 17;
+
+static int guard_acquire(ThreadState *thr, uptr pc, atomic_uint32_t *g,
+                         bool blocking_hooks = true) {
+  if (blocking_hooks)
+    OnPotentiallyBlockingRegionBegin();
+  auto on_exit = at_scope_exit([blocking_hooks] {
+    if (blocking_hooks)
+      OnPotentiallyBlockingRegionEnd();
+  });
+
+  for (;;) {
+    u32 cmp = atomic_load(g, memory_order_acquire);
+    if (cmp == kGuardInit) {
+      if (atomic_compare_exchange_strong(g, &cmp, kGuardRunning,
+                                         memory_order_relaxed))
+        return 1;
+    } else if (cmp == kGuardDone) {
+      if (!thr->in_ignored_lib)
+        Acquire(thr, pc, (uptr)g);
+      return 0;
+    } else {
+      if ((cmp & kGuardWaiter) ||
+          atomic_compare_exchange_strong(g, &cmp, cmp | kGuardWaiter,
+                                         memory_order_relaxed))
+        FutexWait(g, cmp | kGuardWaiter);
+    }
+  }
+}
+
+static void guard_release(ThreadState *thr, uptr pc, atomic_uint32_t *g,
+                          u32 v) {
+  if (!thr->in_ignored_lib)
+    Release(thr, pc, (uptr)g);
+  u32 old = atomic_exchange(g, v, memory_order_release);
+  if (old & kGuardWaiter)
+    FutexWake(g, 1 << 30);
+}
+
+// __cxa_guard_acquire and friends need to be intercepted in a special way -
+// regular interceptors will break statically-linked libstdc++. Linux
+// interceptors are especially defined as weak functions (so that they don't
+// cause link errors when user defines them as well). So they silently
+// auto-disable themselves when such symbol is already present in the binary. If
+// we link libstdc++ statically, it will bring own __cxa_guard_acquire which
+// will silently replace our interceptor.  That's why on Linux we simply export
+// these interceptors with INTERFACE_ATTRIBUTE.
+// On OS X, we don't support statically linking, so we just use a regular
+// interceptor.
+#if SANITIZER_MAC
+#define STDCXX_INTERCEPTOR TSAN_INTERCEPTOR
+#else
+#define STDCXX_INTERCEPTOR(rettype, name, ...) \
+  extern "C" rettype INTERFACE_ATTRIBUTE name(__VA_ARGS__)
+#endif
+
+// Used in thread-safe function static initialization.
+STDCXX_INTERCEPTOR(int, __cxa_guard_acquire, atomic_uint32_t *g) {
+  SCOPED_INTERCEPTOR_RAW(__cxa_guard_acquire, g);
+  return guard_acquire(thr, pc, g);
+}
+
+STDCXX_INTERCEPTOR(void, __cxa_guard_release, atomic_uint32_t *g) {
+  SCOPED_INTERCEPTOR_RAW(__cxa_guard_release, g);
+  guard_release(thr, pc, g, kGuardDone);
+}
+
+STDCXX_INTERCEPTOR(void, __cxa_guard_abort, atomic_uint32_t *g) {
+  SCOPED_INTERCEPTOR_RAW(__cxa_guard_abort, g);
+  guard_release(thr, pc, g, kGuardInit);
+}
+
+namespace __tsan {
+void DestroyThreadState() {
+  ThreadState *thr = cur_thread();
+  Processor *proc = thr->proc();
+  ThreadFinish(thr);
+  ProcUnwire(proc, thr);
+  ProcDestroy(proc);
+  DTLS_Destroy();
+  cur_thread_finalize();
+}
+
+void PlatformCleanUpThreadState(ThreadState *thr) {
+  ThreadSignalContext *sctx = thr->signal_ctx;
+  if (sctx) {
+    thr->signal_ctx = 0;
+    UnmapOrDie(sctx, sizeof(*sctx));
+  }
+}
+}  // namespace __tsan
+
+#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD
+static void thread_finalize(void *v) {
+  uptr iter = (uptr)v;
+  if (iter > 1) {
+    if (pthread_setspecific(interceptor_ctx()->finalize_key,
+        (void*)(iter - 1))) {
+      Printf("ThreadSanitizer: failed to set thread key\n");
+      Die();
+    }
+    return;
+  }
+  DestroyThreadState();
+}
+#endif
+
+
+struct ThreadParam {
+  void* (*callback)(void *arg);
+  void *param;
+  Tid tid;
+  Semaphore created;
+  Semaphore started;
+};
+
+extern "C" void *__tsan_thread_start_func(void *arg) {
+  ThreadParam *p = (ThreadParam*)arg;
+  void* (*callback)(void *arg) = p->callback;
+  void *param = p->param;
+  {
+    ThreadState *thr = cur_thread_init();
+    // Thread-local state is not initialized yet.
+    ScopedIgnoreInterceptors ignore;
+#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD
+    ThreadIgnoreBegin(thr, 0);
+    if (pthread_setspecific(interceptor_ctx()->finalize_key,
+                            (void *)GetPthreadDestructorIterations())) {
+      Printf("ThreadSanitizer: failed to set thread key\n");
+      Die();
+    }
+    ThreadIgnoreEnd(thr);
+#endif
+    p->created.Wait();
+    Processor *proc = ProcCreate();
+    ProcWire(proc, thr);
+    ThreadStart(thr, p->tid, GetTid(), ThreadType::Regular);
+    p->started.Post();
+  }
+  void *res = callback(param);
+  // Prevent the callback from being tail called,
+  // it mixes up stack traces.
+  volatile int foo = 42;
+  foo++;
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_create,
+    void *th, void *attr, void *(*callback)(void*), void * param) {
+  SCOPED_INTERCEPTOR_RAW(pthread_create, th, attr, callback, param);
+
+  MaybeSpawnBackgroundThread();
+
+  if (ctx->after_multithreaded_fork) {
+    if (flags()->die_after_fork) {
+      Report("ThreadSanitizer: starting new threads after multi-threaded "
+          "fork is not supported. Dying (set die_after_fork=0 to override)\n");
+      Die();
+    } else {
+      VPrintf(1,
+              "ThreadSanitizer: starting new threads after multi-threaded "
+              "fork is not supported (pid %lu). Continuing because of "
+              "die_after_fork=0, but you are on your own\n",
+              internal_getpid());
+    }
+  }
+  __sanitizer_pthread_attr_t myattr;
+  if (attr == 0) {
+    pthread_attr_init(&myattr);
+    attr = &myattr;
+  }
+  int detached = 0;
+  REAL(pthread_attr_getdetachstate)(attr, &detached);
+  AdjustStackSize(attr);
+
+  ThreadParam p;
+  p.callback = callback;
+  p.param = param;
+  p.tid = kMainTid;
+  int res = -1;
+  {
+    // Otherwise we see false positives in pthread stack manipulation.
+    ScopedIgnoreInterceptors ignore;
+    ThreadIgnoreBegin(thr, pc);
+    res = REAL(pthread_create)(th, attr, __tsan_thread_start_func, &p);
+    ThreadIgnoreEnd(thr);
+  }
+  if (res == 0) {
+    p.tid = ThreadCreate(thr, pc, *(uptr *)th, IsStateDetached(detached));
+    CHECK_NE(p.tid, kMainTid);
+    // Synchronization on p.tid serves two purposes:
+    // 1. ThreadCreate must finish before the new thread starts.
+    //    Otherwise the new thread can call pthread_detach, but the pthread_t
+    //    identifier is not yet registered in ThreadRegistry by ThreadCreate.
+    // 2. ThreadStart must finish before this thread continues.
+    //    Otherwise, this thread can call pthread_detach and reset thr->sync
+    //    before the new thread got a chance to acquire from it in ThreadStart.
+    p.created.Post();
+    p.started.Wait();
+  }
+  if (attr == &myattr)
+    pthread_attr_destroy(&myattr);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_join, void *th, void **ret) {
+  SCOPED_INTERCEPTOR_RAW(pthread_join, th, ret);
+  Tid tid = ThreadConsumeTid(thr, pc, (uptr)th);
+  ThreadIgnoreBegin(thr, pc);
+  int res = BLOCK_REAL(pthread_join)(th, ret);
+  ThreadIgnoreEnd(thr);
+  if (res == 0) {
+    ThreadJoin(thr, pc, tid);
+  }
+  return res;
+}
+
+DEFINE_REAL_PTHREAD_FUNCTIONS
+
+TSAN_INTERCEPTOR(int, pthread_detach, void *th) {
+  SCOPED_INTERCEPTOR_RAW(pthread_detach, th);
+  Tid tid = ThreadConsumeTid(thr, pc, (uptr)th);
+  int res = REAL(pthread_detach)(th);
+  if (res == 0) {
+    ThreadDetach(thr, pc, tid);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(void, pthread_exit, void *retval) {
+  {
+    SCOPED_INTERCEPTOR_RAW(pthread_exit, retval);
+#if !SANITIZER_MAC && !SANITIZER_ANDROID
+    CHECK_EQ(thr, &cur_thread_placeholder);
+#endif
+  }
+  REAL(pthread_exit)(retval);
+}
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, pthread_tryjoin_np, void *th, void **ret) {
+  SCOPED_INTERCEPTOR_RAW(pthread_tryjoin_np, th, ret);
+  Tid tid = ThreadConsumeTid(thr, pc, (uptr)th);
+  ThreadIgnoreBegin(thr, pc);
+  int res = REAL(pthread_tryjoin_np)(th, ret);
+  ThreadIgnoreEnd(thr);
+  if (res == 0)
+    ThreadJoin(thr, pc, tid);
+  else
+    ThreadNotJoined(thr, pc, tid, (uptr)th);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_timedjoin_np, void *th, void **ret,
+                 const struct timespec *abstime) {
+  SCOPED_INTERCEPTOR_RAW(pthread_timedjoin_np, th, ret, abstime);
+  Tid tid = ThreadConsumeTid(thr, pc, (uptr)th);
+  ThreadIgnoreBegin(thr, pc);
+  int res = BLOCK_REAL(pthread_timedjoin_np)(th, ret, abstime);
+  ThreadIgnoreEnd(thr);
+  if (res == 0)
+    ThreadJoin(thr, pc, tid);
+  else
+    ThreadNotJoined(thr, pc, tid, (uptr)th);
+  return res;
+}
+#endif
+
+// Problem:
+// NPTL implementation of pthread_cond has 2 versions (2.2.5 and 2.3.2).
+// pthread_cond_t has 
diff erent size in the 
diff erent versions.
+// If call new REAL functions for old pthread_cond_t, they will corrupt memory
+// after pthread_cond_t (old cond is smaller).
+// If we call old REAL functions for new pthread_cond_t, we will lose  some
+// functionality (e.g. old functions do not support waiting against
+// CLOCK_REALTIME).
+// Proper handling would require to have 2 versions of interceptors as well.
+// But this is messy, in particular requires linker scripts when sanitizer
+// runtime is linked into a shared library.
+// Instead we assume we don't have dynamic libraries built against old
+// pthread (2.2.5 is dated by 2002). And provide legacy_pthread_cond flag
+// that allows to work with old libraries (but this mode does not support
+// some features, e.g. pthread_condattr_getpshared).
+static void *init_cond(void *c, bool force = false) {
+  // sizeof(pthread_cond_t) >= sizeof(uptr) in both versions.
+  // So we allocate additional memory on the side large enough to hold
+  // any pthread_cond_t object. Always call new REAL functions, but pass
+  // the aux object to them.
+  // Note: the code assumes that PTHREAD_COND_INITIALIZER initializes
+  // first word of pthread_cond_t to zero.
+  // It's all relevant only for linux.
+  if (!common_flags()->legacy_pthread_cond)
+    return c;
+  atomic_uintptr_t *p = (atomic_uintptr_t*)c;
+  uptr cond = atomic_load(p, memory_order_acquire);
+  if (!force && cond != 0)
+    return (void*)cond;
+  void *newcond = WRAP(malloc)(pthread_cond_t_sz);
+  internal_memset(newcond, 0, pthread_cond_t_sz);
+  if (atomic_compare_exchange_strong(p, &cond, (uptr)newcond,
+      memory_order_acq_rel))
+    return newcond;
+  WRAP(free)(newcond);
+  return (void*)cond;
+}
+
+namespace {
+
+template <class Fn>
+struct CondMutexUnlockCtx {
+  ScopedInterceptor *si;
+  ThreadState *thr;
+  uptr pc;
+  void *m;
+  void *c;
+  const Fn &fn;
+
+  int Cancel() const { return fn(); }
+  void Unlock() const;
+};
+
+template <class Fn>
+void CondMutexUnlockCtx<Fn>::Unlock() const {
+  // pthread_cond_wait interceptor has enabled async signal delivery
+  // (see BlockingCall below). Disable async signals since we are running
+  // tsan code. Also ScopedInterceptor and BlockingCall destructors won't run
+  // since the thread is cancelled, so we have to manually execute them
+  // (the thread still can run some user code due to pthread_cleanup_push).
+  ThreadSignalContext *ctx = SigCtx(thr);
+  CHECK_EQ(atomic_load(&ctx->in_blocking_func, memory_order_relaxed), 1);
+  atomic_store(&ctx->in_blocking_func, 0, memory_order_relaxed);
+  MutexPostLock(thr, pc, (uptr)m, MutexFlagDoPreLockOnPostLock);
+  // Undo BlockingCall ctor effects.
+  thr->ignore_interceptors--;
+  si->~ScopedInterceptor();
+}
+}  // namespace
+
+INTERCEPTOR(int, pthread_cond_init, void *c, void *a) {
+  void *cond = init_cond(c, true);
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_init, cond, a);
+  MemoryAccessRange(thr, pc, (uptr)c, sizeof(uptr), true);
+  return REAL(pthread_cond_init)(cond, a);
+}
+
+template <class Fn>
+int cond_wait(ThreadState *thr, uptr pc, ScopedInterceptor *si, const Fn &fn,
+              void *c, void *m) {
+  MemoryAccessRange(thr, pc, (uptr)c, sizeof(uptr), false);
+  MutexUnlock(thr, pc, (uptr)m);
+  int res = 0;
+  // This ensures that we handle mutex lock even in case of pthread_cancel.
+  // See test/tsan/cond_cancel.cpp.
+  {
+    // Enable signal delivery while the thread is blocked.
+    BlockingCall bc(thr);
+    CondMutexUnlockCtx<Fn> arg = {si, thr, pc, m, c, fn};
+    res = call_pthread_cancel_with_cleanup(
+        [](void *arg) -> int {
+          return ((const CondMutexUnlockCtx<Fn> *)arg)->Cancel();
+        },
+        [](void *arg) { ((const CondMutexUnlockCtx<Fn> *)arg)->Unlock(); },
+        &arg);
+  }
+  if (res == errno_EOWNERDEAD) MutexRepair(thr, pc, (uptr)m);
+  MutexPostLock(thr, pc, (uptr)m, MutexFlagDoPreLockOnPostLock);
+  return res;
+}
+
+INTERCEPTOR(int, pthread_cond_wait, void *c, void *m) {
+  void *cond = init_cond(c);
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_wait, cond, m);
+  return cond_wait(
+      thr, pc, &si, [=]() { return REAL(pthread_cond_wait)(cond, m); }, cond,
+      m);
+}
+
+INTERCEPTOR(int, pthread_cond_timedwait, void *c, void *m, void *abstime) {
+  void *cond = init_cond(c);
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_timedwait, cond, m, abstime);
+  return cond_wait(
+      thr, pc, &si,
+      [=]() { return REAL(pthread_cond_timedwait)(cond, m, abstime); }, cond,
+      m);
+}
+
+#if SANITIZER_LINUX
+INTERCEPTOR(int, pthread_cond_clockwait, void *c, void *m,
+            __sanitizer_clockid_t clock, void *abstime) {
+  void *cond = init_cond(c);
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_clockwait, cond, m, clock, abstime);
+  return cond_wait(
+      thr, pc, &si,
+      [=]() { return REAL(pthread_cond_clockwait)(cond, m, clock, abstime); },
+      cond, m);
+}
+#define TSAN_MAYBE_PTHREAD_COND_CLOCKWAIT TSAN_INTERCEPT(pthread_cond_clockwait)
+#else
+#define TSAN_MAYBE_PTHREAD_COND_CLOCKWAIT
+#endif
+
+#if SANITIZER_MAC
+INTERCEPTOR(int, pthread_cond_timedwait_relative_np, void *c, void *m,
+            void *reltime) {
+  void *cond = init_cond(c);
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_timedwait_relative_np, cond, m, reltime);
+  return cond_wait(
+      thr, pc, &si,
+      [=]() {
+        return REAL(pthread_cond_timedwait_relative_np)(cond, m, reltime);
+      },
+      cond, m);
+}
+#endif
+
+INTERCEPTOR(int, pthread_cond_signal, void *c) {
+  void *cond = init_cond(c);
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_signal, cond);
+  MemoryAccessRange(thr, pc, (uptr)c, sizeof(uptr), false);
+  return REAL(pthread_cond_signal)(cond);
+}
+
+INTERCEPTOR(int, pthread_cond_broadcast, void *c) {
+  void *cond = init_cond(c);
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_broadcast, cond);
+  MemoryAccessRange(thr, pc, (uptr)c, sizeof(uptr), false);
+  return REAL(pthread_cond_broadcast)(cond);
+}
+
+INTERCEPTOR(int, pthread_cond_destroy, void *c) {
+  void *cond = init_cond(c);
+  SCOPED_TSAN_INTERCEPTOR(pthread_cond_destroy, cond);
+  MemoryAccessRange(thr, pc, (uptr)c, sizeof(uptr), true);
+  int res = REAL(pthread_cond_destroy)(cond);
+  if (common_flags()->legacy_pthread_cond) {
+    // Free our aux cond and zero the pointer to not leave dangling pointers.
+    WRAP(free)(cond);
+    atomic_store((atomic_uintptr_t*)c, 0, memory_order_relaxed);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_mutex_init, void *m, void *a) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_init, m, a);
+  int res = REAL(pthread_mutex_init)(m, a);
+  if (res == 0) {
+    u32 flagz = 0;
+    if (a) {
+      int type = 0;
+      if (REAL(pthread_mutexattr_gettype)(a, &type) == 0)
+        if (type == PTHREAD_MUTEX_RECURSIVE ||
+            type == PTHREAD_MUTEX_RECURSIVE_NP)
+          flagz |= MutexFlagWriteReentrant;
+    }
+    MutexCreate(thr, pc, (uptr)m, flagz);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_mutex_destroy, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_destroy, m);
+  int res = REAL(pthread_mutex_destroy)(m);
+  if (res == 0 || res == errno_EBUSY) {
+    MutexDestroy(thr, pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_mutex_trylock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_trylock, m);
+  int res = REAL(pthread_mutex_trylock)(m);
+  if (res == errno_EOWNERDEAD)
+    MutexRepair(thr, pc, (uptr)m);
+  if (res == 0 || res == errno_EOWNERDEAD)
+    MutexPostLock(thr, pc, (uptr)m, MutexFlagTryLock);
+  return res;
+}
+
+#if !SANITIZER_MAC
+TSAN_INTERCEPTOR(int, pthread_mutex_timedlock, void *m, void *abstime) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_mutex_timedlock, m, abstime);
+  int res = REAL(pthread_mutex_timedlock)(m, abstime);
+  if (res == 0) {
+    MutexPostLock(thr, pc, (uptr)m, MutexFlagTryLock);
+  }
+  return res;
+}
+#endif
+
+#if !SANITIZER_MAC
+TSAN_INTERCEPTOR(int, pthread_spin_init, void *m, int pshared) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_spin_init, m, pshared);
+  int res = REAL(pthread_spin_init)(m, pshared);
+  if (res == 0) {
+    MutexCreate(thr, pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_spin_destroy, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_spin_destroy, m);
+  int res = REAL(pthread_spin_destroy)(m);
+  if (res == 0) {
+    MutexDestroy(thr, pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_spin_lock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_spin_lock, m);
+  MutexPreLock(thr, pc, (uptr)m);
+  int res = REAL(pthread_spin_lock)(m);
+  if (res == 0) {
+    MutexPostLock(thr, pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_spin_trylock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_spin_trylock, m);
+  int res = REAL(pthread_spin_trylock)(m);
+  if (res == 0) {
+    MutexPostLock(thr, pc, (uptr)m, MutexFlagTryLock);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_spin_unlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_spin_unlock, m);
+  MutexUnlock(thr, pc, (uptr)m);
+  int res = REAL(pthread_spin_unlock)(m);
+  return res;
+}
+#endif
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_init, void *m, void *a) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_init, m, a);
+  int res = REAL(pthread_rwlock_init)(m, a);
+  if (res == 0) {
+    MutexCreate(thr, pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_destroy, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_destroy, m);
+  int res = REAL(pthread_rwlock_destroy)(m);
+  if (res == 0) {
+    MutexDestroy(thr, pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_rdlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_rdlock, m);
+  MutexPreReadLock(thr, pc, (uptr)m);
+  int res = REAL(pthread_rwlock_rdlock)(m);
+  if (res == 0) {
+    MutexPostReadLock(thr, pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_tryrdlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_tryrdlock, m);
+  int res = REAL(pthread_rwlock_tryrdlock)(m);
+  if (res == 0) {
+    MutexPostReadLock(thr, pc, (uptr)m, MutexFlagTryLock);
+  }
+  return res;
+}
+
+#if !SANITIZER_MAC
+TSAN_INTERCEPTOR(int, pthread_rwlock_timedrdlock, void *m, void *abstime) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedrdlock, m, abstime);
+  int res = REAL(pthread_rwlock_timedrdlock)(m, abstime);
+  if (res == 0) {
+    MutexPostReadLock(thr, pc, (uptr)m);
+  }
+  return res;
+}
+#endif
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_wrlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_wrlock, m);
+  MutexPreLock(thr, pc, (uptr)m);
+  int res = REAL(pthread_rwlock_wrlock)(m);
+  if (res == 0) {
+    MutexPostLock(thr, pc, (uptr)m);
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_trywrlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_trywrlock, m);
+  int res = REAL(pthread_rwlock_trywrlock)(m);
+  if (res == 0) {
+    MutexPostLock(thr, pc, (uptr)m, MutexFlagTryLock);
+  }
+  return res;
+}
+
+#if !SANITIZER_MAC
+TSAN_INTERCEPTOR(int, pthread_rwlock_timedwrlock, void *m, void *abstime) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_timedwrlock, m, abstime);
+  int res = REAL(pthread_rwlock_timedwrlock)(m, abstime);
+  if (res == 0) {
+    MutexPostLock(thr, pc, (uptr)m, MutexFlagTryLock);
+  }
+  return res;
+}
+#endif
+
+TSAN_INTERCEPTOR(int, pthread_rwlock_unlock, void *m) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_rwlock_unlock, m);
+  MutexReadOrWriteUnlock(thr, pc, (uptr)m);
+  int res = REAL(pthread_rwlock_unlock)(m);
+  return res;
+}
+
+#if !SANITIZER_MAC
+TSAN_INTERCEPTOR(int, pthread_barrier_init, void *b, void *a, unsigned count) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_barrier_init, b, a, count);
+  MemoryAccess(thr, pc, (uptr)b, 1, kAccessWrite);
+  int res = REAL(pthread_barrier_init)(b, a, count);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_barrier_destroy, void *b) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_barrier_destroy, b);
+  MemoryAccess(thr, pc, (uptr)b, 1, kAccessWrite);
+  int res = REAL(pthread_barrier_destroy)(b);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_barrier_wait, void *b) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_barrier_wait, b);
+  Release(thr, pc, (uptr)b);
+  MemoryAccess(thr, pc, (uptr)b, 1, kAccessRead);
+  int res = REAL(pthread_barrier_wait)(b);
+  MemoryAccess(thr, pc, (uptr)b, 1, kAccessRead);
+  if (res == 0 || res == PTHREAD_BARRIER_SERIAL_THREAD) {
+    Acquire(thr, pc, (uptr)b);
+  }
+  return res;
+}
+#endif
+
+TSAN_INTERCEPTOR(int, pthread_once, void *o, void (*f)()) {
+  SCOPED_INTERCEPTOR_RAW(pthread_once, o, f);
+  if (o == 0 || f == 0)
+    return errno_EINVAL;
+  atomic_uint32_t *a;
+
+  if (SANITIZER_MAC)
+    a = static_cast<atomic_uint32_t*>((void *)((char *)o + sizeof(long_t)));
+  else if (SANITIZER_NETBSD)
+    a = static_cast<atomic_uint32_t*>
+          ((void *)((char *)o + __sanitizer::pthread_mutex_t_sz));
+  else
+    a = static_cast<atomic_uint32_t*>(o);
+
+  // Mac OS X appears to use pthread_once() where calling BlockingRegion hooks
+  // result in crashes due to too little stack space.
+  if (guard_acquire(thr, pc, a, !SANITIZER_MAC)) {
+    (*f)();
+    guard_release(thr, pc, a, kGuardDone);
+  }
+  return 0;
+}
+
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+TSAN_INTERCEPTOR(int, __fxstat, int version, int fd, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__fxstat, version, fd, buf);
+  if (fd > 0)
+    FdAccess(thr, pc, fd);
+  return REAL(__fxstat)(version, fd, buf);
+}
+#define TSAN_MAYBE_INTERCEPT___FXSTAT TSAN_INTERCEPT(__fxstat)
+#else
+#define TSAN_MAYBE_INTERCEPT___FXSTAT
+#endif
+
+TSAN_INTERCEPTOR(int, fstat, int fd, void *buf) {
+#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_ANDROID || SANITIZER_NETBSD
+  SCOPED_TSAN_INTERCEPTOR(fstat, fd, buf);
+  if (fd > 0)
+    FdAccess(thr, pc, fd);
+  return REAL(fstat)(fd, buf);
+#else
+  SCOPED_TSAN_INTERCEPTOR(__fxstat, 0, fd, buf);
+  if (fd > 0)
+    FdAccess(thr, pc, fd);
+  return REAL(__fxstat)(0, fd, buf);
+#endif
+}
+
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+TSAN_INTERCEPTOR(int, __fxstat64, int version, int fd, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__fxstat64, version, fd, buf);
+  if (fd > 0)
+    FdAccess(thr, pc, fd);
+  return REAL(__fxstat64)(version, fd, buf);
+}
+#define TSAN_MAYBE_INTERCEPT___FXSTAT64 TSAN_INTERCEPT(__fxstat64)
+#else
+#define TSAN_MAYBE_INTERCEPT___FXSTAT64
+#endif
+
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+TSAN_INTERCEPTOR(int, fstat64, int fd, void *buf) {
+  SCOPED_TSAN_INTERCEPTOR(__fxstat64, 0, fd, buf);
+  if (fd > 0)
+    FdAccess(thr, pc, fd);
+  return REAL(__fxstat64)(0, fd, buf);
+}
+#define TSAN_MAYBE_INTERCEPT_FSTAT64 TSAN_INTERCEPT(fstat64)
+#else
+#define TSAN_MAYBE_INTERCEPT_FSTAT64
+#endif
+
+TSAN_INTERCEPTOR(int, open, const char *name, int oflag, ...) {
+  va_list ap;
+  va_start(ap, oflag);
+  mode_t mode = va_arg(ap, int);
+  va_end(ap);
+  SCOPED_TSAN_INTERCEPTOR(open, name, oflag, mode);
+  READ_STRING(thr, pc, name, 0);
+  int fd = REAL(open)(name, oflag, mode);
+  if (fd >= 0)
+    FdFileCreate(thr, pc, fd);
+  return fd;
+}
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, open64, const char *name, int oflag, ...) {
+  va_list ap;
+  va_start(ap, oflag);
+  mode_t mode = va_arg(ap, int);
+  va_end(ap);
+  SCOPED_TSAN_INTERCEPTOR(open64, name, oflag, mode);
+  READ_STRING(thr, pc, name, 0);
+  int fd = REAL(open64)(name, oflag, mode);
+  if (fd >= 0)
+    FdFileCreate(thr, pc, fd);
+  return fd;
+}
+#define TSAN_MAYBE_INTERCEPT_OPEN64 TSAN_INTERCEPT(open64)
+#else
+#define TSAN_MAYBE_INTERCEPT_OPEN64
+#endif
+
+TSAN_INTERCEPTOR(int, creat, const char *name, int mode) {
+  SCOPED_TSAN_INTERCEPTOR(creat, name, mode);
+  READ_STRING(thr, pc, name, 0);
+  int fd = REAL(creat)(name, mode);
+  if (fd >= 0)
+    FdFileCreate(thr, pc, fd);
+  return fd;
+}
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, creat64, const char *name, int mode) {
+  SCOPED_TSAN_INTERCEPTOR(creat64, name, mode);
+  READ_STRING(thr, pc, name, 0);
+  int fd = REAL(creat64)(name, mode);
+  if (fd >= 0)
+    FdFileCreate(thr, pc, fd);
+  return fd;
+}
+#define TSAN_MAYBE_INTERCEPT_CREAT64 TSAN_INTERCEPT(creat64)
+#else
+#define TSAN_MAYBE_INTERCEPT_CREAT64
+#endif
+
+TSAN_INTERCEPTOR(int, dup, int oldfd) {
+  SCOPED_TSAN_INTERCEPTOR(dup, oldfd);
+  int newfd = REAL(dup)(oldfd);
+  if (oldfd >= 0 && newfd >= 0 && newfd != oldfd)
+    FdDup(thr, pc, oldfd, newfd, true);
+  return newfd;
+}
+
+TSAN_INTERCEPTOR(int, dup2, int oldfd, int newfd) {
+  SCOPED_TSAN_INTERCEPTOR(dup2, oldfd, newfd);
+  int newfd2 = REAL(dup2)(oldfd, newfd);
+  if (oldfd >= 0 && newfd2 >= 0 && newfd2 != oldfd)
+    FdDup(thr, pc, oldfd, newfd2, false);
+  return newfd2;
+}
+
+#if !SANITIZER_MAC
+TSAN_INTERCEPTOR(int, dup3, int oldfd, int newfd, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(dup3, oldfd, newfd, flags);
+  int newfd2 = REAL(dup3)(oldfd, newfd, flags);
+  if (oldfd >= 0 && newfd2 >= 0 && newfd2 != oldfd)
+    FdDup(thr, pc, oldfd, newfd2, false);
+  return newfd2;
+}
+#endif
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, eventfd, unsigned initval, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(eventfd, initval, flags);
+  int fd = REAL(eventfd)(initval, flags);
+  if (fd >= 0)
+    FdEventCreate(thr, pc, fd);
+  return fd;
+}
+#define TSAN_MAYBE_INTERCEPT_EVENTFD TSAN_INTERCEPT(eventfd)
+#else
+#define TSAN_MAYBE_INTERCEPT_EVENTFD
+#endif
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, signalfd, int fd, void *mask, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(signalfd, fd, mask, flags);
+  if (fd >= 0)
+    FdClose(thr, pc, fd);
+  fd = REAL(signalfd)(fd, mask, flags);
+  if (fd >= 0)
+    FdSignalCreate(thr, pc, fd);
+  return fd;
+}
+#define TSAN_MAYBE_INTERCEPT_SIGNALFD TSAN_INTERCEPT(signalfd)
+#else
+#define TSAN_MAYBE_INTERCEPT_SIGNALFD
+#endif
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, inotify_init, int fake) {
+  SCOPED_TSAN_INTERCEPTOR(inotify_init, fake);
+  int fd = REAL(inotify_init)(fake);
+  if (fd >= 0)
+    FdInotifyCreate(thr, pc, fd);
+  return fd;
+}
+#define TSAN_MAYBE_INTERCEPT_INOTIFY_INIT TSAN_INTERCEPT(inotify_init)
+#else
+#define TSAN_MAYBE_INTERCEPT_INOTIFY_INIT
+#endif
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, inotify_init1, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(inotify_init1, flags);
+  int fd = REAL(inotify_init1)(flags);
+  if (fd >= 0)
+    FdInotifyCreate(thr, pc, fd);
+  return fd;
+}
+#define TSAN_MAYBE_INTERCEPT_INOTIFY_INIT1 TSAN_INTERCEPT(inotify_init1)
+#else
+#define TSAN_MAYBE_INTERCEPT_INOTIFY_INIT1
+#endif
+
+TSAN_INTERCEPTOR(int, socket, int domain, int type, int protocol) {
+  SCOPED_TSAN_INTERCEPTOR(socket, domain, type, protocol);
+  int fd = REAL(socket)(domain, type, protocol);
+  if (fd >= 0)
+    FdSocketCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, socketpair, int domain, int type, int protocol, int *fd) {
+  SCOPED_TSAN_INTERCEPTOR(socketpair, domain, type, protocol, fd);
+  int res = REAL(socketpair)(domain, type, protocol, fd);
+  if (res == 0 && fd[0] >= 0 && fd[1] >= 0)
+    FdPipeCreate(thr, pc, fd[0], fd[1]);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, connect, int fd, void *addr, unsigned addrlen) {
+  SCOPED_TSAN_INTERCEPTOR(connect, fd, addr, addrlen);
+  FdSocketConnecting(thr, pc, fd);
+  int res = REAL(connect)(fd, addr, addrlen);
+  if (res == 0 && fd >= 0)
+    FdSocketConnect(thr, pc, fd);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, bind, int fd, void *addr, unsigned addrlen) {
+  SCOPED_TSAN_INTERCEPTOR(bind, fd, addr, addrlen);
+  int res = REAL(bind)(fd, addr, addrlen);
+  if (fd > 0 && res == 0)
+    FdAccess(thr, pc, fd);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, listen, int fd, int backlog) {
+  SCOPED_TSAN_INTERCEPTOR(listen, fd, backlog);
+  int res = REAL(listen)(fd, backlog);
+  if (fd > 0 && res == 0)
+    FdAccess(thr, pc, fd);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, close, int fd) {
+  SCOPED_TSAN_INTERCEPTOR(close, fd);
+  if (fd >= 0)
+    FdClose(thr, pc, fd);
+  return REAL(close)(fd);
+}
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, __close, int fd) {
+  SCOPED_TSAN_INTERCEPTOR(__close, fd);
+  if (fd >= 0)
+    FdClose(thr, pc, fd);
+  return REAL(__close)(fd);
+}
+#define TSAN_MAYBE_INTERCEPT___CLOSE TSAN_INTERCEPT(__close)
+#else
+#define TSAN_MAYBE_INTERCEPT___CLOSE
+#endif
+
+// glibc guts
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+TSAN_INTERCEPTOR(void, __res_iclose, void *state, bool free_addr) {
+  SCOPED_TSAN_INTERCEPTOR(__res_iclose, state, free_addr);
+  int fds[64];
+  int cnt = ExtractResolvFDs(state, fds, ARRAY_SIZE(fds));
+  for (int i = 0; i < cnt; i++) {
+    if (fds[i] > 0)
+      FdClose(thr, pc, fds[i]);
+  }
+  REAL(__res_iclose)(state, free_addr);
+}
+#define TSAN_MAYBE_INTERCEPT___RES_ICLOSE TSAN_INTERCEPT(__res_iclose)
+#else
+#define TSAN_MAYBE_INTERCEPT___RES_ICLOSE
+#endif
+
+TSAN_INTERCEPTOR(int, pipe, int *pipefd) {
+  SCOPED_TSAN_INTERCEPTOR(pipe, pipefd);
+  int res = REAL(pipe)(pipefd);
+  if (res == 0 && pipefd[0] >= 0 && pipefd[1] >= 0)
+    FdPipeCreate(thr, pc, pipefd[0], pipefd[1]);
+  return res;
+}
+
+#if !SANITIZER_MAC
+TSAN_INTERCEPTOR(int, pipe2, int *pipefd, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(pipe2, pipefd, flags);
+  int res = REAL(pipe2)(pipefd, flags);
+  if (res == 0 && pipefd[0] >= 0 && pipefd[1] >= 0)
+    FdPipeCreate(thr, pc, pipefd[0], pipefd[1]);
+  return res;
+}
+#endif
+
+TSAN_INTERCEPTOR(int, unlink, char *path) {
+  SCOPED_TSAN_INTERCEPTOR(unlink, path);
+  Release(thr, pc, File2addr(path));
+  int res = REAL(unlink)(path);
+  return res;
+}
+
+TSAN_INTERCEPTOR(void*, tmpfile, int fake) {
+  SCOPED_TSAN_INTERCEPTOR(tmpfile, fake);
+  void *res = REAL(tmpfile)(fake);
+  if (res) {
+    int fd = fileno_unlocked(res);
+    if (fd >= 0)
+      FdFileCreate(thr, pc, fd);
+  }
+  return res;
+}
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(void*, tmpfile64, int fake) {
+  SCOPED_TSAN_INTERCEPTOR(tmpfile64, fake);
+  void *res = REAL(tmpfile64)(fake);
+  if (res) {
+    int fd = fileno_unlocked(res);
+    if (fd >= 0)
+      FdFileCreate(thr, pc, fd);
+  }
+  return res;
+}
+#define TSAN_MAYBE_INTERCEPT_TMPFILE64 TSAN_INTERCEPT(tmpfile64)
+#else
+#define TSAN_MAYBE_INTERCEPT_TMPFILE64
+#endif
+
+static void FlushStreams() {
+  // Flushing all the streams here may freeze the process if a child thread is
+  // performing file stream operations at the same time.
+  REAL(fflush)(stdout);
+  REAL(fflush)(stderr);
+}
+
+TSAN_INTERCEPTOR(void, abort, int fake) {
+  SCOPED_TSAN_INTERCEPTOR(abort, fake);
+  FlushStreams();
+  REAL(abort)(fake);
+}
+
+TSAN_INTERCEPTOR(int, rmdir, char *path) {
+  SCOPED_TSAN_INTERCEPTOR(rmdir, path);
+  Release(thr, pc, Dir2addr(path));
+  int res = REAL(rmdir)(path);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, closedir, void *dirp) {
+  SCOPED_TSAN_INTERCEPTOR(closedir, dirp);
+  if (dirp) {
+    int fd = dirfd(dirp);
+    FdClose(thr, pc, fd);
+  }
+  return REAL(closedir)(dirp);
+}
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, epoll_create, int size) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_create, size);
+  int fd = REAL(epoll_create)(size);
+  if (fd >= 0)
+    FdPollCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, epoll_create1, int flags) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_create1, flags);
+  int fd = REAL(epoll_create1)(flags);
+  if (fd >= 0)
+    FdPollCreate(thr, pc, fd);
+  return fd;
+}
+
+TSAN_INTERCEPTOR(int, epoll_ctl, int epfd, int op, int fd, void *ev) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_ctl, epfd, op, fd, ev);
+  if (epfd >= 0)
+    FdAccess(thr, pc, epfd);
+  if (epfd >= 0 && fd >= 0)
+    FdAccess(thr, pc, fd);
+  if (op == EPOLL_CTL_ADD && epfd >= 0)
+    FdRelease(thr, pc, epfd);
+  int res = REAL(epoll_ctl)(epfd, op, fd, ev);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, epoll_wait, int epfd, void *ev, int cnt, int timeout) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_wait, epfd, ev, cnt, timeout);
+  if (epfd >= 0)
+    FdAccess(thr, pc, epfd);
+  int res = BLOCK_REAL(epoll_wait)(epfd, ev, cnt, timeout);
+  if (res > 0 && epfd >= 0)
+    FdAcquire(thr, pc, epfd);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, epoll_pwait, int epfd, void *ev, int cnt, int timeout,
+                 void *sigmask) {
+  SCOPED_TSAN_INTERCEPTOR(epoll_pwait, epfd, ev, cnt, timeout, sigmask);
+  if (epfd >= 0)
+    FdAccess(thr, pc, epfd);
+  int res = BLOCK_REAL(epoll_pwait)(epfd, ev, cnt, timeout, sigmask);
+  if (res > 0 && epfd >= 0)
+    FdAcquire(thr, pc, epfd);
+  return res;
+}
+
+#define TSAN_MAYBE_INTERCEPT_EPOLL \
+    TSAN_INTERCEPT(epoll_create); \
+    TSAN_INTERCEPT(epoll_create1); \
+    TSAN_INTERCEPT(epoll_ctl); \
+    TSAN_INTERCEPT(epoll_wait); \
+    TSAN_INTERCEPT(epoll_pwait)
+#else
+#define TSAN_MAYBE_INTERCEPT_EPOLL
+#endif
+
+// The following functions are intercepted merely to process pending signals.
+// If program blocks signal X, we must deliver the signal before the function
+// returns. Similarly, if program unblocks a signal (or returns from sigsuspend)
+// it's better to deliver the signal straight away.
+TSAN_INTERCEPTOR(int, sigsuspend, const __sanitizer_sigset_t *mask) {
+  SCOPED_TSAN_INTERCEPTOR(sigsuspend, mask);
+  return REAL(sigsuspend)(mask);
+}
+
+TSAN_INTERCEPTOR(int, sigblock, int mask) {
+  SCOPED_TSAN_INTERCEPTOR(sigblock, mask);
+  return REAL(sigblock)(mask);
+}
+
+TSAN_INTERCEPTOR(int, sigsetmask, int mask) {
+  SCOPED_TSAN_INTERCEPTOR(sigsetmask, mask);
+  return REAL(sigsetmask)(mask);
+}
+
+TSAN_INTERCEPTOR(int, pthread_sigmask, int how, const __sanitizer_sigset_t *set,
+    __sanitizer_sigset_t *oldset) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_sigmask, how, set, oldset);
+  return REAL(pthread_sigmask)(how, set, oldset);
+}
+
+namespace __tsan {
+
+static void ReportErrnoSpoiling(ThreadState *thr, uptr pc) {
+  VarSizeStackTrace stack;
+  // StackTrace::GetNestInstructionPc(pc) is used because return address is
+  // expected, OutputReport() will undo this.
+  ObtainCurrentStack(thr, StackTrace::GetNextInstructionPc(pc), &stack);
+  ThreadRegistryLock l(&ctx->thread_registry);
+  ScopedReport rep(ReportTypeErrnoInSignal);
+  if (!IsFiredSuppression(ctx, ReportTypeErrnoInSignal, stack)) {
+    rep.AddStack(stack, true);
+    OutputReport(thr, rep);
+  }
+}
+
+static void CallUserSignalHandler(ThreadState *thr, bool sync, bool acquire,
+                                  int sig, __sanitizer_siginfo *info,
+                                  void *uctx) {
+  __sanitizer_sigaction *sigactions = interceptor_ctx()->sigactions;
+  if (acquire)
+    Acquire(thr, 0, (uptr)&sigactions[sig]);
+  // Signals are generally asynchronous, so if we receive a signals when
+  // ignores are enabled we should disable ignores. This is critical for sync
+  // and interceptors, because otherwise we can miss synchronization and report
+  // false races.
+  int ignore_reads_and_writes = thr->ignore_reads_and_writes;
+  int ignore_interceptors = thr->ignore_interceptors;
+  int ignore_sync = thr->ignore_sync;
+  // For symbolizer we only process SIGSEGVs synchronously
+  // (bug in symbolizer or in tsan). But we want to reset
+  // in_symbolizer to fail gracefully. Symbolizer and user code
+  // use 
diff erent memory allocators, so if we don't reset
+  // in_symbolizer we can get memory allocated with one being
+  // feed with another, which can cause more crashes.
+  int in_symbolizer = thr->in_symbolizer;
+  if (!ctx->after_multithreaded_fork) {
+    thr->ignore_reads_and_writes = 0;
+    thr->fast_state.ClearIgnoreBit();
+    thr->ignore_interceptors = 0;
+    thr->ignore_sync = 0;
+    thr->in_symbolizer = 0;
+  }
+  // Ensure that the handler does not spoil errno.
+  const int saved_errno = errno;
+  errno = 99;
+  // This code races with sigaction. Be careful to not read sa_sigaction twice.
+  // Also need to remember pc for reporting before the call,
+  // because the handler can reset it.
+  volatile uptr pc = (sigactions[sig].sa_flags & SA_SIGINFO)
+                         ? (uptr)sigactions[sig].sigaction
+                         : (uptr)sigactions[sig].handler;
+  if (pc != sig_dfl && pc != sig_ign) {
+    // The callback can be either sa_handler or sa_sigaction.
+    // They have 
diff erent signatures, but we assume that passing
+    // additional arguments to sa_handler works and is harmless.
+    ((__sanitizer_sigactionhandler_ptr)pc)(sig, info, uctx);
+  }
+  if (!ctx->after_multithreaded_fork) {
+    thr->ignore_reads_and_writes = ignore_reads_and_writes;
+    if (ignore_reads_and_writes)
+      thr->fast_state.SetIgnoreBit();
+    thr->ignore_interceptors = ignore_interceptors;
+    thr->ignore_sync = ignore_sync;
+    thr->in_symbolizer = in_symbolizer;
+  }
+  // We do not detect errno spoiling for SIGTERM,
+  // because some SIGTERM handlers do spoil errno but reraise SIGTERM,
+  // tsan reports false positive in such case.
+  // It's 
diff icult to properly detect this situation (reraise),
+  // because in async signal processing case (when handler is called directly
+  // from rtl_generic_sighandler) we have not yet received the reraised
+  // signal; and it looks too fragile to intercept all ways to reraise a signal.
+  if (ShouldReport(thr, ReportTypeErrnoInSignal) && !sync && sig != SIGTERM &&
+      errno != 99)
+    ReportErrnoSpoiling(thr, pc);
+  errno = saved_errno;
+}
+
+void ProcessPendingSignalsImpl(ThreadState *thr) {
+  atomic_store(&thr->pending_signals, 0, memory_order_relaxed);
+  ThreadSignalContext *sctx = SigCtx(thr);
+  if (sctx == 0)
+    return;
+  atomic_fetch_add(&thr->in_signal_handler, 1, memory_order_relaxed);
+  internal_sigfillset(&sctx->emptyset);
+  int res = REAL(pthread_sigmask)(SIG_SETMASK, &sctx->emptyset, &sctx->oldset);
+  CHECK_EQ(res, 0);
+  for (int sig = 0; sig < kSigCount; sig++) {
+    SignalDesc *signal = &sctx->pending_signals[sig];
+    if (signal->armed) {
+      signal->armed = false;
+      CallUserSignalHandler(thr, false, true, sig, &signal->siginfo,
+                            &signal->ctx);
+    }
+  }
+  res = REAL(pthread_sigmask)(SIG_SETMASK, &sctx->oldset, 0);
+  CHECK_EQ(res, 0);
+  atomic_fetch_add(&thr->in_signal_handler, -1, memory_order_relaxed);
+}
+
+}  // namespace __tsan
+
+static bool is_sync_signal(ThreadSignalContext *sctx, int sig) {
+  return sig == SIGSEGV || sig == SIGBUS || sig == SIGILL || sig == SIGTRAP ||
+         sig == SIGABRT || sig == SIGFPE || sig == SIGPIPE || sig == SIGSYS ||
+         // If we are sending signal to ourselves, we must process it now.
+         (sctx && sig == sctx->int_signal_send);
+}
+
+void sighandler(int sig, __sanitizer_siginfo *info, void *ctx) {
+  ThreadState *thr = cur_thread_init();
+  ThreadSignalContext *sctx = SigCtx(thr);
+  if (sig < 0 || sig >= kSigCount) {
+    VPrintf(1, "ThreadSanitizer: ignoring signal %d\n", sig);
+    return;
+  }
+  // Don't mess with synchronous signals.
+  const bool sync = is_sync_signal(sctx, sig);
+  if (sync ||
+      // If we are in blocking function, we can safely process it now
+      // (but check if we are in a recursive interceptor,
+      // i.e. pthread_join()->munmap()).
+      (sctx && atomic_load(&sctx->in_blocking_func, memory_order_relaxed))) {
+    atomic_fetch_add(&thr->in_signal_handler, 1, memory_order_relaxed);
+    if (sctx && atomic_load(&sctx->in_blocking_func, memory_order_relaxed)) {
+      atomic_store(&sctx->in_blocking_func, 0, memory_order_relaxed);
+      CallUserSignalHandler(thr, sync, true, sig, info, ctx);
+      atomic_store(&sctx->in_blocking_func, 1, memory_order_relaxed);
+    } else {
+      // Be very conservative with when we do acquire in this case.
+      // It's unsafe to do acquire in async handlers, because ThreadState
+      // can be in inconsistent state.
+      // SIGSYS looks relatively safe -- it's synchronous and can actually
+      // need some global state.
+      bool acq = (sig == SIGSYS);
+      CallUserSignalHandler(thr, sync, acq, sig, info, ctx);
+    }
+    atomic_fetch_add(&thr->in_signal_handler, -1, memory_order_relaxed);
+    return;
+  }
+
+  if (sctx == 0)
+    return;
+  SignalDesc *signal = &sctx->pending_signals[sig];
+  if (signal->armed == false) {
+    signal->armed = true;
+    internal_memcpy(&signal->siginfo, info, sizeof(*info));
+    internal_memcpy(&signal->ctx, ctx, sizeof(signal->ctx));
+    atomic_store(&thr->pending_signals, 1, memory_order_relaxed);
+  }
+}
+
+TSAN_INTERCEPTOR(int, raise, int sig) {
+  SCOPED_TSAN_INTERCEPTOR(raise, sig);
+  ThreadSignalContext *sctx = SigCtx(thr);
+  CHECK_NE(sctx, 0);
+  int prev = sctx->int_signal_send;
+  sctx->int_signal_send = sig;
+  int res = REAL(raise)(sig);
+  CHECK_EQ(sctx->int_signal_send, sig);
+  sctx->int_signal_send = prev;
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, kill, int pid, int sig) {
+  SCOPED_TSAN_INTERCEPTOR(kill, pid, sig);
+  ThreadSignalContext *sctx = SigCtx(thr);
+  CHECK_NE(sctx, 0);
+  int prev = sctx->int_signal_send;
+  if (pid == (int)internal_getpid()) {
+    sctx->int_signal_send = sig;
+  }
+  int res = REAL(kill)(pid, sig);
+  if (pid == (int)internal_getpid()) {
+    CHECK_EQ(sctx->int_signal_send, sig);
+    sctx->int_signal_send = prev;
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, pthread_kill, void *tid, int sig) {
+  SCOPED_TSAN_INTERCEPTOR(pthread_kill, tid, sig);
+  ThreadSignalContext *sctx = SigCtx(thr);
+  CHECK_NE(sctx, 0);
+  int prev = sctx->int_signal_send;
+  bool self = pthread_equal(tid, pthread_self());
+  if (self)
+    sctx->int_signal_send = sig;
+  int res = REAL(pthread_kill)(tid, sig);
+  if (self) {
+    CHECK_EQ(sctx->int_signal_send, sig);
+    sctx->int_signal_send = prev;
+  }
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, gettimeofday, void *tv, void *tz) {
+  SCOPED_TSAN_INTERCEPTOR(gettimeofday, tv, tz);
+  // It's intercepted merely to process pending signals.
+  return REAL(gettimeofday)(tv, tz);
+}
+
+TSAN_INTERCEPTOR(int, getaddrinfo, void *node, void *service,
+    void *hints, void *rv) {
+  SCOPED_TSAN_INTERCEPTOR(getaddrinfo, node, service, hints, rv);
+  // We miss atomic synchronization in getaddrinfo,
+  // and can report false race between malloc and free
+  // inside of getaddrinfo. So ignore memory accesses.
+  ThreadIgnoreBegin(thr, pc);
+  int res = REAL(getaddrinfo)(node, service, hints, rv);
+  ThreadIgnoreEnd(thr);
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, fork, int fake) {
+  if (in_symbolizer())
+    return REAL(fork)(fake);
+  SCOPED_INTERCEPTOR_RAW(fork, fake);
+  return REAL(fork)(fake);
+}
+
+void atfork_prepare() {
+  if (in_symbolizer())
+    return;
+  ThreadState *thr = cur_thread();
+  const uptr pc = StackTrace::GetCurrentPc();
+  ForkBefore(thr, pc);
+}
+
+void atfork_parent() {
+  if (in_symbolizer())
+    return;
+  ThreadState *thr = cur_thread();
+  const uptr pc = StackTrace::GetCurrentPc();
+  ForkParentAfter(thr, pc);
+}
+
+void atfork_child() {
+  if (in_symbolizer())
+    return;
+  ThreadState *thr = cur_thread();
+  const uptr pc = StackTrace::GetCurrentPc();
+  ForkChildAfter(thr, pc, true);
+  FdOnFork(thr, pc);
+}
+
+TSAN_INTERCEPTOR(int, vfork, int fake) {
+  // Some programs (e.g. openjdk) call close for all file descriptors
+  // in the child process. Under tsan it leads to false positives, because
+  // address space is shared, so the parent process also thinks that
+  // the descriptors are closed (while they are actually not).
+  // This leads to false positives due to missed synchronization.
+  // Strictly saying this is undefined behavior, because vfork child is not
+  // allowed to call any functions other than exec/exit. But this is what
+  // openjdk does, so we want to handle it.
+  // We could disable interceptors in the child process. But it's not possible
+  // to simply intercept and wrap vfork, because vfork child is not allowed
+  // to return from the function that calls vfork, and that's exactly what
+  // we would do. So this would require some assembly trickery as well.
+  // Instead we simply turn vfork into fork.
+  return WRAP(fork)(fake);
+}
+
+#if SANITIZER_LINUX
+TSAN_INTERCEPTOR(int, clone, int (*fn)(void *), void *stack, int flags,
+                 void *arg, int *parent_tid, void *tls, pid_t *child_tid) {
+  SCOPED_INTERCEPTOR_RAW(clone, fn, stack, flags, arg, parent_tid, tls,
+                         child_tid);
+  struct Arg {
+    int (*fn)(void *);
+    void *arg;
+  };
+  auto wrapper = +[](void *p) -> int {
+    auto *thr = cur_thread();
+    uptr pc = GET_CURRENT_PC();
+    // Start the background thread for fork, but not for clone.
+    // For fork we did this always and it's known to work (or user code has
+    // adopted). But if we do this for the new clone interceptor some code
+    // (sandbox2) fails. So model we used to do for years and don't start the
+    // background thread after clone.
+    ForkChildAfter(thr, pc, false);
+    FdOnFork(thr, pc);
+    auto *arg = static_cast<Arg *>(p);
+    return arg->fn(arg->arg);
+  };
+  ForkBefore(thr, pc);
+  Arg arg_wrapper = {fn, arg};
+  int pid = REAL(clone)(wrapper, stack, flags, &arg_wrapper, parent_tid, tls,
+                        child_tid);
+  ForkParentAfter(thr, pc);
+  return pid;
+}
+#endif
+
+#if !SANITIZER_MAC && !SANITIZER_ANDROID
+typedef int (*dl_iterate_phdr_cb_t)(__sanitizer_dl_phdr_info *info, SIZE_T size,
+                                    void *data);
+struct dl_iterate_phdr_data {
+  ThreadState *thr;
+  uptr pc;
+  dl_iterate_phdr_cb_t cb;
+  void *data;
+};
+
+static bool IsAppNotRodata(uptr addr) {
+  return IsAppMem(addr) && *MemToShadow(addr) != kShadowRodata;
+}
+
+static int dl_iterate_phdr_cb(__sanitizer_dl_phdr_info *info, SIZE_T size,
+                              void *data) {
+  dl_iterate_phdr_data *cbdata = (dl_iterate_phdr_data *)data;
+  // dlopen/dlclose allocate/free dynamic-linker-internal memory, which is later
+  // accessible in dl_iterate_phdr callback. But we don't see synchronization
+  // inside of dynamic linker, so we "unpoison" it here in order to not
+  // produce false reports. Ignoring malloc/free in dlopen/dlclose is not enough
+  // because some libc functions call __libc_dlopen.
+  if (info && IsAppNotRodata((uptr)info->dlpi_name))
+    MemoryResetRange(cbdata->thr, cbdata->pc, (uptr)info->dlpi_name,
+                     internal_strlen(info->dlpi_name));
+  int res = cbdata->cb(info, size, cbdata->data);
+  // Perform the check one more time in case info->dlpi_name was overwritten
+  // by user callback.
+  if (info && IsAppNotRodata((uptr)info->dlpi_name))
+    MemoryResetRange(cbdata->thr, cbdata->pc, (uptr)info->dlpi_name,
+                     internal_strlen(info->dlpi_name));
+  return res;
+}
+
+TSAN_INTERCEPTOR(int, dl_iterate_phdr, dl_iterate_phdr_cb_t cb, void *data) {
+  SCOPED_TSAN_INTERCEPTOR(dl_iterate_phdr, cb, data);
+  dl_iterate_phdr_data cbdata;
+  cbdata.thr = thr;
+  cbdata.pc = pc;
+  cbdata.cb = cb;
+  cbdata.data = data;
+  int res = REAL(dl_iterate_phdr)(dl_iterate_phdr_cb, &cbdata);
+  return res;
+}
+#endif
+
+static int OnExit(ThreadState *thr) {
+  int status = Finalize(thr);
+  FlushStreams();
+  return status;
+}
+
+struct TsanInterceptorContext {
+  ThreadState *thr;
+  const uptr pc;
+};
+
+#if !SANITIZER_MAC
+static void HandleRecvmsg(ThreadState *thr, uptr pc,
+    __sanitizer_msghdr *msg) {
+  int fds[64];
+  int cnt = ExtractRecvmsgFDs(msg, fds, ARRAY_SIZE(fds));
+  for (int i = 0; i < cnt; i++)
+    FdEventCreate(thr, pc, fds[i]);
+}
+#endif
+
+#include "sanitizer_common/sanitizer_platform_interceptors.h"
+// Causes interceptor recursion (getaddrinfo() and fopen())
+#undef SANITIZER_INTERCEPT_GETADDRINFO
+// We define our own.
+#if SANITIZER_INTERCEPT_TLS_GET_ADDR
+#define NEED_TLS_GET_ADDR
+#endif
+#undef SANITIZER_INTERCEPT_TLS_GET_ADDR
+#define SANITIZER_INTERCEPT_TLS_GET_OFFSET 1
+#undef SANITIZER_INTERCEPT_PTHREAD_SIGMASK
+
+#define COMMON_INTERCEPT_FUNCTION(name) INTERCEPT_FUNCTION(name)
+#define COMMON_INTERCEPT_FUNCTION_VER(name, ver)                          \
+  INTERCEPT_FUNCTION_VER(name, ver)
+#define COMMON_INTERCEPT_FUNCTION_VER_UNVERSIONED_FALLBACK(name, ver) \
+  (INTERCEPT_FUNCTION_VER(name, ver) || INTERCEPT_FUNCTION(name))
+
+#define COMMON_INTERCEPTOR_WRITE_RANGE(ctx, ptr, size)                    \
+  MemoryAccessRange(((TsanInterceptorContext *)ctx)->thr,                 \
+                    ((TsanInterceptorContext *)ctx)->pc, (uptr)ptr, size, \
+                    true)
+
+#define COMMON_INTERCEPTOR_READ_RANGE(ctx, ptr, size)                       \
+  MemoryAccessRange(((TsanInterceptorContext *) ctx)->thr,                  \
+                    ((TsanInterceptorContext *) ctx)->pc, (uptr) ptr, size, \
+                    false)
+
+#define COMMON_INTERCEPTOR_ENTER(ctx, func, ...) \
+  SCOPED_TSAN_INTERCEPTOR(func, __VA_ARGS__);    \
+  TsanInterceptorContext _ctx = {thr, pc};       \
+  ctx = (void *)&_ctx;                           \
+  (void)ctx;
+
+#define COMMON_INTERCEPTOR_ENTER_NOIGNORE(ctx, func, ...) \
+  SCOPED_INTERCEPTOR_RAW(func, __VA_ARGS__);              \
+  TsanInterceptorContext _ctx = {thr, pc};                \
+  ctx = (void *)&_ctx;                                    \
+  (void)ctx;
+
+#define COMMON_INTERCEPTOR_FILE_OPEN(ctx, file, path) \
+  if (path)                                           \
+    Acquire(thr, pc, File2addr(path));                \
+  if (file) {                                         \
+    int fd = fileno_unlocked(file);                   \
+    if (fd >= 0) FdFileCreate(thr, pc, fd);           \
+  }
+
+#define COMMON_INTERCEPTOR_FILE_CLOSE(ctx, file) \
+  if (file) {                                    \
+    int fd = fileno_unlocked(file);              \
+    if (fd >= 0) FdClose(thr, pc, fd);           \
+  }
+
+#define COMMON_INTERCEPTOR_DLOPEN(filename, flag) \
+  ({                                              \
+    CheckNoDeepBind(filename, flag);              \
+    ThreadIgnoreBegin(thr, 0);                    \
+    void *res = REAL(dlopen)(filename, flag);     \
+    ThreadIgnoreEnd(thr);                         \
+    res;                                          \
+  })
+
+#define COMMON_INTERCEPTOR_LIBRARY_LOADED(filename, handle) \
+  libignore()->OnLibraryLoaded(filename)
+
+#define COMMON_INTERCEPTOR_LIBRARY_UNLOADED() \
+  libignore()->OnLibraryUnloaded()
+
+#define COMMON_INTERCEPTOR_ACQUIRE(ctx, u) \
+  Acquire(((TsanInterceptorContext *) ctx)->thr, pc, u)
+
+#define COMMON_INTERCEPTOR_RELEASE(ctx, u) \
+  Release(((TsanInterceptorContext *) ctx)->thr, pc, u)
+
+#define COMMON_INTERCEPTOR_DIR_ACQUIRE(ctx, path) \
+  Acquire(((TsanInterceptorContext *) ctx)->thr, pc, Dir2addr(path))
+
+#define COMMON_INTERCEPTOR_FD_ACQUIRE(ctx, fd) \
+  FdAcquire(((TsanInterceptorContext *) ctx)->thr, pc, fd)
+
+#define COMMON_INTERCEPTOR_FD_RELEASE(ctx, fd) \
+  FdRelease(((TsanInterceptorContext *) ctx)->thr, pc, fd)
+
+#define COMMON_INTERCEPTOR_FD_ACCESS(ctx, fd) \
+  FdAccess(((TsanInterceptorContext *) ctx)->thr, pc, fd)
+
+#define COMMON_INTERCEPTOR_FD_SOCKET_ACCEPT(ctx, fd, newfd) \
+  FdSocketAccept(((TsanInterceptorContext *) ctx)->thr, pc, fd, newfd)
+
+#define COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name) \
+  ThreadSetName(((TsanInterceptorContext *) ctx)->thr, name)
+
+#define COMMON_INTERCEPTOR_SET_PTHREAD_NAME(ctx, thread, name)         \
+  if (pthread_equal(pthread_self(), reinterpret_cast<void *>(thread))) \
+    COMMON_INTERCEPTOR_SET_THREAD_NAME(ctx, name);                     \
+  else                                                                 \
+    __tsan::ctx->thread_registry.SetThreadNameByUserId(thread, name)
+
+#define COMMON_INTERCEPTOR_BLOCK_REAL(name) BLOCK_REAL(name)
+
+#define COMMON_INTERCEPTOR_ON_EXIT(ctx) \
+  OnExit(((TsanInterceptorContext *) ctx)->thr)
+
+#define COMMON_INTERCEPTOR_MUTEX_PRE_LOCK(ctx, m) \
+  MutexPreLock(((TsanInterceptorContext *)ctx)->thr, \
+            ((TsanInterceptorContext *)ctx)->pc, (uptr)m)
+
+#define COMMON_INTERCEPTOR_MUTEX_POST_LOCK(ctx, m) \
+  MutexPostLock(((TsanInterceptorContext *)ctx)->thr, \
+            ((TsanInterceptorContext *)ctx)->pc, (uptr)m)
+
+#define COMMON_INTERCEPTOR_MUTEX_UNLOCK(ctx, m) \
+  MutexUnlock(((TsanInterceptorContext *)ctx)->thr, \
+            ((TsanInterceptorContext *)ctx)->pc, (uptr)m)
+
+#define COMMON_INTERCEPTOR_MUTEX_REPAIR(ctx, m) \
+  MutexRepair(((TsanInterceptorContext *)ctx)->thr, \
+            ((TsanInterceptorContext *)ctx)->pc, (uptr)m)
+
+#define COMMON_INTERCEPTOR_MUTEX_INVALID(ctx, m) \
+  MutexInvalidAccess(((TsanInterceptorContext *)ctx)->thr, \
+                     ((TsanInterceptorContext *)ctx)->pc, (uptr)m)
+
+#define COMMON_INTERCEPTOR_MMAP_IMPL(ctx, mmap, addr, sz, prot, flags, fd,  \
+                                     off)                                   \
+  do {                                                                      \
+    return mmap_interceptor(thr, pc, REAL(mmap), addr, sz, prot, flags, fd, \
+                            off);                                           \
+  } while (false)
+
+#if !SANITIZER_MAC
+#define COMMON_INTERCEPTOR_HANDLE_RECVMSG(ctx, msg) \
+  HandleRecvmsg(((TsanInterceptorContext *)ctx)->thr, \
+      ((TsanInterceptorContext *)ctx)->pc, msg)
+#endif
+
+#define COMMON_INTERCEPTOR_GET_TLS_RANGE(begin, end)                           \
+  if (TsanThread *t = GetCurrentThread()) {                                    \
+    *begin = t->tls_begin();                                                   \
+    *end = t->tls_end();                                                       \
+  } else {                                                                     \
+    *begin = *end = 0;                                                         \
+  }
+
+#define COMMON_INTERCEPTOR_USER_CALLBACK_START() \
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_START()
+
+#define COMMON_INTERCEPTOR_USER_CALLBACK_END() \
+  SCOPED_TSAN_INTERCEPTOR_USER_CALLBACK_END()
+
+#include "sanitizer_common/sanitizer_common_interceptors.inc"
+
+static int sigaction_impl(int sig, const __sanitizer_sigaction *act,
+                          __sanitizer_sigaction *old);
+static __sanitizer_sighandler_ptr signal_impl(int sig,
+                                              __sanitizer_sighandler_ptr h);
+
+#define SIGNAL_INTERCEPTOR_SIGACTION_IMPL(signo, act, oldact) \
+  { return sigaction_impl(signo, act, oldact); }
+
+#define SIGNAL_INTERCEPTOR_SIGNAL_IMPL(func, signo, handler) \
+  { return (uptr)signal_impl(signo, (__sanitizer_sighandler_ptr)handler); }
+
+#include "sanitizer_common/sanitizer_signal_interceptors.inc"
+
+int sigaction_impl(int sig, const __sanitizer_sigaction *act,
+                   __sanitizer_sigaction *old) {
+  // Note: if we call REAL(sigaction) directly for any reason without proxying
+  // the signal handler through sighandler, very bad things will happen.
+  // The handler will run synchronously and corrupt tsan per-thread state.
+  SCOPED_INTERCEPTOR_RAW(sigaction, sig, act, old);
+  if (sig <= 0 || sig >= kSigCount) {
+    errno = errno_EINVAL;
+    return -1;
+  }
+  __sanitizer_sigaction *sigactions = interceptor_ctx()->sigactions;
+  __sanitizer_sigaction old_stored;
+  if (old) internal_memcpy(&old_stored, &sigactions[sig], sizeof(old_stored));
+  __sanitizer_sigaction newact;
+  if (act) {
+    // Copy act into sigactions[sig].
+    // Can't use struct copy, because compiler can emit call to memcpy.
+    // Can't use internal_memcpy, because it copies byte-by-byte,
+    // and signal handler reads the handler concurrently. It it can read
+    // some bytes from old value and some bytes from new value.
+    // Use volatile to prevent insertion of memcpy.
+    sigactions[sig].handler =
+        *(volatile __sanitizer_sighandler_ptr const *)&act->handler;
+    sigactions[sig].sa_flags = *(volatile int const *)&act->sa_flags;
+    internal_memcpy(&sigactions[sig].sa_mask, &act->sa_mask,
+                    sizeof(sigactions[sig].sa_mask));
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD
+    sigactions[sig].sa_restorer = act->sa_restorer;
+#endif
+    internal_memcpy(&newact, act, sizeof(newact));
+    internal_sigfillset(&newact.sa_mask);
+    if ((act->sa_flags & SA_SIGINFO) ||
+        ((uptr)act->handler != sig_ign && (uptr)act->handler != sig_dfl)) {
+      newact.sa_flags |= SA_SIGINFO;
+      newact.sigaction = sighandler;
+    }
+    ReleaseStore(thr, pc, (uptr)&sigactions[sig]);
+    act = &newact;
+  }
+  int res = REAL(sigaction)(sig, act, old);
+  if (res == 0 && old && old->sigaction == sighandler)
+    internal_memcpy(old, &old_stored, sizeof(*old));
+  return res;
+}
+
+static __sanitizer_sighandler_ptr signal_impl(int sig,
+                                              __sanitizer_sighandler_ptr h) {
+  __sanitizer_sigaction act;
+  act.handler = h;
+  internal_memset(&act.sa_mask, -1, sizeof(act.sa_mask));
+  act.sa_flags = 0;
+  __sanitizer_sigaction old;
+  int res = sigaction_symname(sig, &act, &old);
+  if (res) return (__sanitizer_sighandler_ptr)sig_err;
+  return old.handler;
+}
+
+#define TSAN_SYSCALL()             \
+  ThreadState *thr = cur_thread(); \
+  if (thr->ignore_interceptors)    \
+    return;                        \
+  ScopedSyscall scoped_syscall(thr)
+
+struct ScopedSyscall {
+  ThreadState *thr;
+
+  explicit ScopedSyscall(ThreadState *thr) : thr(thr) { LazyInitialize(thr); }
+
+  ~ScopedSyscall() {
+    ProcessPendingSignals(thr);
+  }
+};
+
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC
+static void syscall_access_range(uptr pc, uptr p, uptr s, bool write) {
+  TSAN_SYSCALL();
+  MemoryAccessRange(thr, pc, p, s, write);
+}
+
+static USED void syscall_acquire(uptr pc, uptr addr) {
+  TSAN_SYSCALL();
+  Acquire(thr, pc, addr);
+  DPrintf("syscall_acquire(0x%zx))\n", addr);
+}
+
+static USED void syscall_release(uptr pc, uptr addr) {
+  TSAN_SYSCALL();
+  DPrintf("syscall_release(0x%zx)\n", addr);
+  Release(thr, pc, addr);
+}
+
+static void syscall_fd_close(uptr pc, int fd) {
+  TSAN_SYSCALL();
+  FdClose(thr, pc, fd);
+}
+
+static USED void syscall_fd_acquire(uptr pc, int fd) {
+  TSAN_SYSCALL();
+  FdAcquire(thr, pc, fd);
+  DPrintf("syscall_fd_acquire(%d)\n", fd);
+}
+
+static USED void syscall_fd_release(uptr pc, int fd) {
+  TSAN_SYSCALL();
+  DPrintf("syscall_fd_release(%d)\n", fd);
+  FdRelease(thr, pc, fd);
+}
+
+static void syscall_pre_fork(uptr pc) { ForkBefore(cur_thread(), pc); }
+
+static void syscall_post_fork(uptr pc, int pid) {
+  ThreadState *thr = cur_thread();
+  if (pid == 0) {
+    // child
+    ForkChildAfter(thr, pc, true);
+    FdOnFork(thr, pc);
+  } else if (pid > 0) {
+    // parent
+    ForkParentAfter(thr, pc);
+  } else {
+    // error
+    ForkParentAfter(thr, pc);
+  }
+}
+#endif
+
+#define COMMON_SYSCALL_PRE_READ_RANGE(p, s) \
+  syscall_access_range(GET_CALLER_PC(), (uptr)(p), (uptr)(s), false)
+
+#define COMMON_SYSCALL_PRE_WRITE_RANGE(p, s) \
+  syscall_access_range(GET_CALLER_PC(), (uptr)(p), (uptr)(s), true)
+
+#define COMMON_SYSCALL_POST_READ_RANGE(p, s) \
+  do {                                       \
+    (void)(p);                               \
+    (void)(s);                               \
+  } while (false)
+
+#define COMMON_SYSCALL_POST_WRITE_RANGE(p, s) \
+  do {                                        \
+    (void)(p);                                \
+    (void)(s);                                \
+  } while (false)
+
+#define COMMON_SYSCALL_ACQUIRE(addr) \
+    syscall_acquire(GET_CALLER_PC(), (uptr)(addr))
+
+#define COMMON_SYSCALL_RELEASE(addr) \
+    syscall_release(GET_CALLER_PC(), (uptr)(addr))
+
+#define COMMON_SYSCALL_FD_CLOSE(fd) syscall_fd_close(GET_CALLER_PC(), fd)
+
+#define COMMON_SYSCALL_FD_ACQUIRE(fd) syscall_fd_acquire(GET_CALLER_PC(), fd)
+
+#define COMMON_SYSCALL_FD_RELEASE(fd) syscall_fd_release(GET_CALLER_PC(), fd)
+
+#define COMMON_SYSCALL_PRE_FORK() \
+  syscall_pre_fork(GET_CALLER_PC())
+
+#define COMMON_SYSCALL_POST_FORK(res) \
+  syscall_post_fork(GET_CALLER_PC(), res)
+
+#include "sanitizer_common/sanitizer_common_syscalls.inc"
+#include "sanitizer_common/sanitizer_syscalls_netbsd.inc"
+
+#ifdef NEED_TLS_GET_ADDR
+
+static void handle_tls_addr(void *arg, void *res) {
+  ThreadState *thr = cur_thread();
+  if (!thr)
+    return;
+  DTLS::DTV *dtv = DTLS_on_tls_get_addr(arg, res, thr->tls_addr,
+                                        thr->tls_addr + thr->tls_size);
+  if (!dtv)
+    return;
+  // New DTLS block has been allocated.
+  MemoryResetRange(thr, 0, dtv->beg, dtv->size);
+}
+
+#if !SANITIZER_S390
+// Define own interceptor instead of sanitizer_common's for three reasons:
+// 1. It must not process pending signals.
+//    Signal handlers may contain MOVDQA instruction (see below).
+// 2. It must be as simple as possible to not contain MOVDQA.
+// 3. Sanitizer_common version uses COMMON_INTERCEPTOR_INITIALIZE_RANGE which
+//    is empty for tsan (meant only for msan).
+// Note: __tls_get_addr can be called with mis-aligned stack due to:
+// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
+// So the interceptor must work with mis-aligned stack, in particular, does not
+// execute MOVDQA with stack addresses.
+TSAN_INTERCEPTOR(void *, __tls_get_addr, void *arg) {
+  void *res = REAL(__tls_get_addr)(arg);
+  handle_tls_addr(arg, res);
+  return res;
+}
+#else // SANITIZER_S390
+TSAN_INTERCEPTOR(uptr, __tls_get_addr_internal, void *arg) {
+  uptr res = __tls_get_offset_wrapper(arg, REAL(__tls_get_offset));
+  char *tp = static_cast<char *>(__builtin_thread_pointer());
+  handle_tls_addr(arg, res + tp);
+  return res;
+}
+#endif
+#endif
+
+#if SANITIZER_NETBSD
+TSAN_INTERCEPTOR(void, _lwp_exit) {
+  SCOPED_TSAN_INTERCEPTOR(_lwp_exit);
+  DestroyThreadState();
+  REAL(_lwp_exit)();
+}
+#define TSAN_MAYBE_INTERCEPT__LWP_EXIT TSAN_INTERCEPT(_lwp_exit)
+#else
+#define TSAN_MAYBE_INTERCEPT__LWP_EXIT
+#endif
+
+#if SANITIZER_FREEBSD
+TSAN_INTERCEPTOR(void, thr_exit, tid_t *state) {
+  SCOPED_TSAN_INTERCEPTOR(thr_exit, state);
+  DestroyThreadState();
+  REAL(thr_exit(state));
+}
+#define TSAN_MAYBE_INTERCEPT_THR_EXIT TSAN_INTERCEPT(thr_exit)
+#else
+#define TSAN_MAYBE_INTERCEPT_THR_EXIT
+#endif
+
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_init, void *c, void *a)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_signal, void *c)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_broadcast, void *c)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_wait, void *c, void *m)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, cond_destroy, void *c)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, mutex_init, void *m, void *a)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, mutex_destroy, void *m)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, mutex_trylock, void *m)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, rwlock_init, void *m, void *a)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, rwlock_destroy, void *m)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, rwlock_rdlock, void *m)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, rwlock_tryrdlock, void *m)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, rwlock_wrlock, void *m)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, rwlock_trywrlock, void *m)
+TSAN_INTERCEPTOR_NETBSD_ALIAS(int, rwlock_unlock, void *m)
+TSAN_INTERCEPTOR_NETBSD_ALIAS_THR(int, once, void *o, void (*f)())
+TSAN_INTERCEPTOR_NETBSD_ALIAS_THR2(int, sigsetmask, sigmask, int a, void *b,
+  void *c)
+
+namespace __tsan {
+
+static void finalize(void *arg) {
+  ThreadState *thr = cur_thread();
+  int status = Finalize(thr);
+  // Make sure the output is not lost.
+  FlushStreams();
+  if (status)
+    Die();
+}
+
+#if !SANITIZER_MAC && !SANITIZER_ANDROID
+static void unreachable() {
+  Report("FATAL: ThreadSanitizer: unreachable called\n");
+  Die();
+}
+#endif
+
+// Define default implementation since interception of libdispatch  is optional.
+SANITIZER_WEAK_ATTRIBUTE void InitializeLibdispatchInterceptors() {}
+
+void InitializeInterceptors() {
+#if !SANITIZER_MAC
+  // We need to setup it early, because functions like dlsym() can call it.
+  REAL(memset) = internal_memset;
+  REAL(memcpy) = internal_memcpy;
+#endif
+
+  new(interceptor_ctx()) InterceptorContext();
+
+  InitializeCommonInterceptors();
+  InitializeSignalInterceptors();
+  InitializeLibdispatchInterceptors();
+
+#if !SANITIZER_MAC
+  // We can not use TSAN_INTERCEPT to get setjmp addr,
+  // because it does &setjmp and setjmp is not present in some versions of libc.
+  using __interception::InterceptFunction;
+  InterceptFunction(TSAN_STRING_SETJMP, (uptr*)&REAL(setjmp_symname), 0, 0);
+  InterceptFunction("_setjmp", (uptr*)&REAL(_setjmp), 0, 0);
+  InterceptFunction(TSAN_STRING_SIGSETJMP, (uptr*)&REAL(sigsetjmp_symname), 0,
+                    0);
+#if !SANITIZER_NETBSD
+  InterceptFunction("__sigsetjmp", (uptr*)&REAL(__sigsetjmp), 0, 0);
+#endif
+#endif
+
+  TSAN_INTERCEPT(longjmp_symname);
+  TSAN_INTERCEPT(siglongjmp_symname);
+#if SANITIZER_NETBSD
+  TSAN_INTERCEPT(_longjmp);
+#endif
+
+  TSAN_INTERCEPT(malloc);
+  TSAN_INTERCEPT(__libc_memalign);
+  TSAN_INTERCEPT(calloc);
+  TSAN_INTERCEPT(realloc);
+  TSAN_INTERCEPT(reallocarray);
+  TSAN_INTERCEPT(free);
+  TSAN_INTERCEPT(cfree);
+  TSAN_INTERCEPT(munmap);
+  TSAN_MAYBE_INTERCEPT_MEMALIGN;
+  TSAN_INTERCEPT(valloc);
+  TSAN_MAYBE_INTERCEPT_PVALLOC;
+  TSAN_INTERCEPT(posix_memalign);
+
+  TSAN_INTERCEPT(strcpy);
+  TSAN_INTERCEPT(strncpy);
+  TSAN_INTERCEPT(strdup);
+
+  TSAN_INTERCEPT(pthread_create);
+  TSAN_INTERCEPT(pthread_join);
+  TSAN_INTERCEPT(pthread_detach);
+  TSAN_INTERCEPT(pthread_exit);
+  #if SANITIZER_LINUX
+  TSAN_INTERCEPT(pthread_tryjoin_np);
+  TSAN_INTERCEPT(pthread_timedjoin_np);
+  #endif
+
+  TSAN_INTERCEPT_VER(pthread_cond_init, PTHREAD_ABI_BASE);
+  TSAN_INTERCEPT_VER(pthread_cond_signal, PTHREAD_ABI_BASE);
+  TSAN_INTERCEPT_VER(pthread_cond_broadcast, PTHREAD_ABI_BASE);
+  TSAN_INTERCEPT_VER(pthread_cond_wait, PTHREAD_ABI_BASE);
+  TSAN_INTERCEPT_VER(pthread_cond_timedwait, PTHREAD_ABI_BASE);
+  TSAN_INTERCEPT_VER(pthread_cond_destroy, PTHREAD_ABI_BASE);
+
+  TSAN_MAYBE_PTHREAD_COND_CLOCKWAIT;
+
+  TSAN_INTERCEPT(pthread_mutex_init);
+  TSAN_INTERCEPT(pthread_mutex_destroy);
+  TSAN_INTERCEPT(pthread_mutex_trylock);
+  TSAN_INTERCEPT(pthread_mutex_timedlock);
+
+  TSAN_INTERCEPT(pthread_spin_init);
+  TSAN_INTERCEPT(pthread_spin_destroy);
+  TSAN_INTERCEPT(pthread_spin_lock);
+  TSAN_INTERCEPT(pthread_spin_trylock);
+  TSAN_INTERCEPT(pthread_spin_unlock);
+
+  TSAN_INTERCEPT(pthread_rwlock_init);
+  TSAN_INTERCEPT(pthread_rwlock_destroy);
+  TSAN_INTERCEPT(pthread_rwlock_rdlock);
+  TSAN_INTERCEPT(pthread_rwlock_tryrdlock);
+  TSAN_INTERCEPT(pthread_rwlock_timedrdlock);
+  TSAN_INTERCEPT(pthread_rwlock_wrlock);
+  TSAN_INTERCEPT(pthread_rwlock_trywrlock);
+  TSAN_INTERCEPT(pthread_rwlock_timedwrlock);
+  TSAN_INTERCEPT(pthread_rwlock_unlock);
+
+  TSAN_INTERCEPT(pthread_barrier_init);
+  TSAN_INTERCEPT(pthread_barrier_destroy);
+  TSAN_INTERCEPT(pthread_barrier_wait);
+
+  TSAN_INTERCEPT(pthread_once);
+
+  TSAN_INTERCEPT(fstat);
+  TSAN_MAYBE_INTERCEPT___FXSTAT;
+  TSAN_MAYBE_INTERCEPT_FSTAT64;
+  TSAN_MAYBE_INTERCEPT___FXSTAT64;
+  TSAN_INTERCEPT(open);
+  TSAN_MAYBE_INTERCEPT_OPEN64;
+  TSAN_INTERCEPT(creat);
+  TSAN_MAYBE_INTERCEPT_CREAT64;
+  TSAN_INTERCEPT(dup);
+  TSAN_INTERCEPT(dup2);
+  TSAN_INTERCEPT(dup3);
+  TSAN_MAYBE_INTERCEPT_EVENTFD;
+  TSAN_MAYBE_INTERCEPT_SIGNALFD;
+  TSAN_MAYBE_INTERCEPT_INOTIFY_INIT;
+  TSAN_MAYBE_INTERCEPT_INOTIFY_INIT1;
+  TSAN_INTERCEPT(socket);
+  TSAN_INTERCEPT(socketpair);
+  TSAN_INTERCEPT(connect);
+  TSAN_INTERCEPT(bind);
+  TSAN_INTERCEPT(listen);
+  TSAN_MAYBE_INTERCEPT_EPOLL;
+  TSAN_INTERCEPT(close);
+  TSAN_MAYBE_INTERCEPT___CLOSE;
+  TSAN_MAYBE_INTERCEPT___RES_ICLOSE;
+  TSAN_INTERCEPT(pipe);
+  TSAN_INTERCEPT(pipe2);
+
+  TSAN_INTERCEPT(unlink);
+  TSAN_INTERCEPT(tmpfile);
+  TSAN_MAYBE_INTERCEPT_TMPFILE64;
+  TSAN_INTERCEPT(abort);
+  TSAN_INTERCEPT(rmdir);
+  TSAN_INTERCEPT(closedir);
+
+  TSAN_INTERCEPT(sigsuspend);
+  TSAN_INTERCEPT(sigblock);
+  TSAN_INTERCEPT(sigsetmask);
+  TSAN_INTERCEPT(pthread_sigmask);
+  TSAN_INTERCEPT(raise);
+  TSAN_INTERCEPT(kill);
+  TSAN_INTERCEPT(pthread_kill);
+  TSAN_INTERCEPT(sleep);
+  TSAN_INTERCEPT(usleep);
+  TSAN_INTERCEPT(nanosleep);
+  TSAN_INTERCEPT(pause);
+  TSAN_INTERCEPT(gettimeofday);
+  TSAN_INTERCEPT(getaddrinfo);
+
+  TSAN_INTERCEPT(fork);
+  TSAN_INTERCEPT(vfork);
+#if SANITIZER_LINUX
+  TSAN_INTERCEPT(clone);
+#endif
+#if !SANITIZER_ANDROID
+  TSAN_INTERCEPT(dl_iterate_phdr);
+#endif
+  TSAN_MAYBE_INTERCEPT_ON_EXIT;
+  TSAN_INTERCEPT(__cxa_atexit);
+  TSAN_INTERCEPT(_exit);
+
+#ifdef NEED_TLS_GET_ADDR
+#if !SANITIZER_S390
+  TSAN_INTERCEPT(__tls_get_addr);
+#else
+  TSAN_INTERCEPT(__tls_get_addr_internal);
+  TSAN_INTERCEPT(__tls_get_offset);
+#endif
+#endif
+
+  TSAN_MAYBE_INTERCEPT__LWP_EXIT;
+  TSAN_MAYBE_INTERCEPT_THR_EXIT;
+
+#if !SANITIZER_MAC && !SANITIZER_ANDROID
+  // Need to setup it, because interceptors check that the function is resolved.
+  // But atexit is emitted directly into the module, so can't be resolved.
+  REAL(atexit) = (int(*)(void(*)()))unreachable;
+#endif
+
+  if (REAL(__cxa_atexit)(&finalize, 0, 0)) {
+    Printf("ThreadSanitizer: failed to setup atexit callback\n");
+    Die();
+  }
+  if (pthread_atfork(atfork_prepare, atfork_parent, atfork_child)) {
+    Printf("ThreadSanitizer: failed to setup atfork callbacks\n");
+    Die();
+  }
+
+#if !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_FREEBSD
+  if (pthread_key_create(&interceptor_ctx()->finalize_key, &thread_finalize)) {
+    Printf("ThreadSanitizer: failed to create thread key\n");
+    Die();
+  }
+#endif
+
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_init);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_signal);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_broadcast);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_wait);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(cond_destroy);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(mutex_init);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(mutex_destroy);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(mutex_trylock);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(rwlock_init);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(rwlock_destroy);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(rwlock_rdlock);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(rwlock_tryrdlock);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(rwlock_wrlock);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(rwlock_trywrlock);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS(rwlock_unlock);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(once);
+  TSAN_MAYBE_INTERCEPT_NETBSD_ALIAS_THR(sigsetmask);
+
+  FdInit();
+}
+
+}  // namespace __tsan
+
+// Invisible barrier for tests.
+// There were several unsuccessful iterations for this functionality:
+// 1. Initially it was implemented in user code using
+//    REAL(pthread_barrier_wait). But pthread_barrier_wait is not supported on
+//    MacOS. Futexes are linux-specific for this matter.
+// 2. Then we switched to atomics+usleep(10). But usleep produced parasitic
+//    "as-if synchronized via sleep" messages in reports which failed some
+//    output tests.
+// 3. Then we switched to atomics+sched_yield. But this produced tons of tsan-
+//    visible events, which lead to "failed to restore stack trace" failures.
+// Note that no_sanitize_thread attribute does not turn off atomic interception
+// so attaching it to the function defined in user code does not help.
+// That's why we now have what we have.
+constexpr u32 kBarrierThreadBits = 10;
+constexpr u32 kBarrierThreads = 1 << kBarrierThreadBits;
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __tsan_testonly_barrier_init(
+    atomic_uint32_t *barrier, u32 num_threads) {
+  if (num_threads >= kBarrierThreads) {
+    Printf("barrier_init: count is too large (%d)\n", num_threads);
+    Die();
+  }
+  // kBarrierThreadBits lsb is thread count,
+  // the remaining are count of entered threads.
+  atomic_store(barrier, num_threads, memory_order_relaxed);
+}
+
+static u32 barrier_epoch(u32 value) {
+  return (value >> kBarrierThreadBits) / (value & (kBarrierThreads - 1));
+}
+
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE void __tsan_testonly_barrier_wait(
+    atomic_uint32_t *barrier) {
+  u32 old = atomic_fetch_add(barrier, kBarrierThreads, memory_order_relaxed);
+  u32 old_epoch = barrier_epoch(old);
+  if (barrier_epoch(old + kBarrierThreads) != old_epoch) {
+    FutexWake(barrier, (1 << 30));
+    return;
+  }
+  for (;;) {
+    u32 cur = atomic_load(barrier, memory_order_relaxed);
+    if (barrier_epoch(cur) != old_epoch)
+      return;
+    FutexWait(barrier, cur);
+  }
+}

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interface.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interface.cpp
new file mode 100644
index 0000000000000..048715185151c
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interface.cpp
@@ -0,0 +1,106 @@
+//===-- tsan_interface.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_interface.h"
+#include "tsan_interface_ann.h"
+#include "tsan_rtl.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_ptrauth.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;
+
+void __tsan_init() { Initialize(cur_thread_init()); }
+
+void __tsan_flush_memory() {
+  FlushShadowMemory();
+}
+
+void __tsan_read16(void *addr) {
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  MemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
+  MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
+}
+
+void __tsan_write16(void *addr) {
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  MemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
+  MemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
+}
+
+void __tsan_read16_pc(void *addr, void *pc) {
+  uptr pc_no_pac = STRIP_PAC_PC(pc);
+  ThreadState *thr = cur_thread();
+  MemoryAccess(thr, pc_no_pac, (uptr)addr, 8, kAccessRead);
+  MemoryAccess(thr, pc_no_pac, (uptr)addr + 8, 8, kAccessRead);
+}
+
+void __tsan_write16_pc(void *addr, void *pc) {
+  uptr pc_no_pac = STRIP_PAC_PC(pc);
+  ThreadState *thr = cur_thread();
+  MemoryAccess(thr, pc_no_pac, (uptr)addr, 8, kAccessWrite);
+  MemoryAccess(thr, pc_no_pac, (uptr)addr + 8, 8, kAccessWrite);
+}
+
+// __tsan_unaligned_read/write calls are emitted by compiler.
+
+void __tsan_unaligned_read16(const void *addr) {
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
+  UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
+}
+
+void __tsan_unaligned_write16(void *addr) {
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
+  UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
+}
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__tsan_get_current_fiber() {
+  return cur_thread();
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__tsan_create_fiber(unsigned flags) {
+  return FiberCreate(cur_thread(), CALLERPC, flags);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_destroy_fiber(void *fiber) {
+  FiberDestroy(cur_thread(), CALLERPC, static_cast<ThreadState *>(fiber));
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_switch_to_fiber(void *fiber, unsigned flags) {
+  FiberSwitch(cur_thread(), CALLERPC, static_cast<ThreadState *>(fiber), flags);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_set_fiber_name(void *fiber, const char *name) {
+  ThreadSetName(static_cast<ThreadState *>(fiber), name);
+}
+}  // extern "C"
+
+void __tsan_acquire(void *addr) {
+  Acquire(cur_thread(), CALLERPC, (uptr)addr);
+}
+
+void __tsan_release(void *addr) {
+  Release(cur_thread(), CALLERPC, (uptr)addr);
+}

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interface.h b/compiler-rt/lib/tsan/rtl-old/tsan_interface.h
new file mode 100644
index 0000000000000..711f064174c2c
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interface.h
@@ -0,0 +1,424 @@
+//===-- tsan_interface.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// The functions declared in this header will be inserted by the instrumentation
+// module.
+// This header can be included by the instrumented program or by TSan tests.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_H
+#define TSAN_INTERFACE_H
+
+#include <sanitizer_common/sanitizer_internal_defs.h>
+using __sanitizer::uptr;
+using __sanitizer::tid_t;
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __tsan_.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !SANITIZER_GO
+
+// This function should be called at the very beginning of the process,
+// before any instrumented code is executed and before any call to malloc.
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_init();
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_flush_memory();
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read1(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read2(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read4(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read8(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read16(void *addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_write1(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_write2(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_write4(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_write8(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_write16(void *addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read2(const void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read4(const void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read8(const void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read16(const void *addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write2(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write4(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write8(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write16(void *addr);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read1_pc(void *addr, void *pc);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read2_pc(void *addr, void *pc);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read4_pc(void *addr, void *pc);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read8_pc(void *addr, void *pc);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read16_pc(void *addr, void *pc);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_write1_pc(void *addr, void *pc);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_write2_pc(void *addr, void *pc);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_write4_pc(void *addr, void *pc);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_write8_pc(void *addr, void *pc);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_write16_pc(void *addr, void *pc);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_vptr_read(void **vptr_p);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_vptr_update(void **vptr_p, void *new_val);
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_func_entry(void *call_pc);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_func_exit();
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_ignore_thread_begin();
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_ignore_thread_end();
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__tsan_external_register_tag(const char *object_type);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_register_header(void *tag, const char *header);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_assign_tag(void *addr, void *tag);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_read(void *addr, void *caller_pc, void *tag);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_external_write(void *addr, void *caller_pc, void *tag);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_read_range(void *addr, unsigned long size);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_write_range(void *addr, unsigned long size);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_read_range_pc(void *addr, unsigned long size, void *pc);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_write_range_pc(void *addr, unsigned long size, void *pc);
+
+// User may provide function that would be called right when TSan detects
+// an error. The argument 'report' is an opaque pointer that can be used to
+// gather additional information using other TSan report API functions.
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_on_report(void *report);
+
+// If TSan is currently reporting a detected issue on the current thread,
+// returns an opaque pointer to the current report. Otherwise returns NULL.
+SANITIZER_INTERFACE_ATTRIBUTE
+void *__tsan_get_current_report();
+
+// Returns a report's description (issue type), number of duplicate issues
+// found, counts of array data (stack traces, memory operations, locations,
+// mutexes, threads, unique thread IDs) and a stack trace of a sleep() call (if
+// one was involved in the issue).
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_data(void *report, const char **description, int *count,
+                           int *stack_count, int *mop_count, int *loc_count,
+                           int *mutex_count, int *thread_count,
+                           int *unique_tid_count, void **sleep_trace,
+                           uptr trace_size);
+
+/// Retrieves the "tag" from a report (for external-race report types). External
+/// races can be associated with a tag which give them more meaning. For example
+/// tag value '1' means "Swift access race". Tag value '0' indicated a plain
+/// external race.
+///
+/// \param report opaque pointer to the current report (obtained as argument in
+///               __tsan_on_report, or from __tsan_get_current_report)
+/// \param [out] tag points to storage that will be filled with the tag value
+///
+/// \returns non-zero value on success, zero on failure
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_tag(void *report, uptr *tag);
+
+// Returns information about stack traces included in the report.
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_stack(void *report, uptr idx, void **trace,
+                            uptr trace_size);
+
+// Returns information about memory operations included in the report.
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_mop(void *report, uptr idx, int *tid, void **addr,
+                          int *size, int *write, int *atomic, void **trace,
+                          uptr trace_size);
+
+// Returns information about locations included in the report.
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_loc(void *report, uptr idx, const char **type,
+                          void **addr, uptr *start, uptr *size, int *tid,
+                          int *fd, int *suppressable, void **trace,
+                          uptr trace_size);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_loc_object_type(void *report, uptr idx,
+                                      const char **object_type);
+
+// Returns information about mutexes included in the report.
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_mutex(void *report, uptr idx, uptr *mutex_id, void **addr,
+                            int *destroyed, void **trace, uptr trace_size);
+
+// Returns information about threads included in the report.
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_thread(void *report, uptr idx, int *tid, tid_t *os_id,
+                             int *running, const char **name, int *parent_tid,
+                             void **trace, uptr trace_size);
+
+// Returns information about unique thread IDs included in the report.
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_report_unique_tid(void *report, uptr idx, int *tid);
+
+// Returns the type of the pointer (heap, stack, global, ...) and if possible
+// also the starting address (e.g. of a heap allocation) and size.
+SANITIZER_INTERFACE_ATTRIBUTE
+const char *__tsan_locate_address(uptr addr, char *name, uptr name_size,
+                                  uptr *region_address, uptr *region_size);
+
+// Returns the allocation stack for a heap pointer.
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_get_alloc_stack(uptr addr, uptr *trace, uptr size, int *thread_id,
+                           tid_t *os_id);
+
+#endif  // SANITIZER_GO
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+namespace __tsan {
+
+// These should match declarations from public tsan_interface_atomic.h header.
+typedef unsigned char      a8;
+typedef unsigned short a16;
+typedef unsigned int       a32;
+typedef unsigned long long a64;
+#if !SANITIZER_GO && (defined(__SIZEOF_INT128__) \
+    || (__clang_major__ * 100 + __clang_minor__ >= 302)) && \
+    !defined(__mips64) && !defined(__s390x__)
+__extension__ typedef __int128 a128;
+# define __TSAN_HAS_INT128 1
+#else
+# define __TSAN_HAS_INT128 0
+#endif
+
+// Part of ABI, do not change.
+// https://github.com/llvm/llvm-project/blob/main/libcxx/include/atomic
+typedef enum {
+  mo_relaxed,
+  mo_consume,
+  mo_acquire,
+  mo_release,
+  mo_acq_rel,
+  mo_seq_cst
+} morder;
+
+struct ThreadState;
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_load(const volatile a8 *a, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_load(const volatile a16 *a, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_load(const volatile a32 *a, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_load(const volatile a64 *a, morder mo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_load(const volatile a128 *a, morder mo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic8_store(volatile a8 *a, a8 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic16_store(volatile a16 *a, a16 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic32_store(volatile a32 *a, a32 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic64_store(volatile a64 *a, a64 v, morder mo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic128_store(volatile a128 *a, a128 v, morder mo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_exchange(volatile a8 *a, a8 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_exchange(volatile a16 *a, a16 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_exchange(volatile a32 *a, a32 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_exchange(volatile a64 *a, a64 v, morder mo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_exchange(volatile a128 *a, a128 v, morder mo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_add(volatile a8 *a, a8 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_add(volatile a16 *a, a16 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_add(volatile a32 *a, a32 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_add(volatile a64 *a, a64 v, morder mo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_add(volatile a128 *a, a128 v, morder mo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_sub(volatile a8 *a, a8 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_sub(volatile a16 *a, a16 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_sub(volatile a32 *a, a32 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_sub(volatile a64 *a, a64 v, morder mo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_sub(volatile a128 *a, a128 v, morder mo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_and(volatile a8 *a, a8 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_and(volatile a16 *a, a16 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_and(volatile a32 *a, a32 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_and(volatile a64 *a, a64 v, morder mo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_and(volatile a128 *a, a128 v, morder mo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_or(volatile a8 *a, a8 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_or(volatile a16 *a, a16 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_or(volatile a32 *a, a32 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_or(volatile a64 *a, a64 v, morder mo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_or(volatile a128 *a, a128 v, morder mo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_xor(volatile a8 *a, a8 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_xor(volatile a16 *a, a16 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_xor(volatile a32 *a, a32 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_xor(volatile a64 *a, a64 v, morder mo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_xor(volatile a128 *a, a128 v, morder mo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_nand(volatile a8 *a, a8 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_nand(volatile a16 *a, a16 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_nand(volatile a32 *a, a32 v, morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_nand(volatile a64 *a, a64 v, morder mo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_nand(volatile a128 *a, a128 v, morder mo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic8_compare_exchange_strong(volatile a8 *a, a8 *c, a8 v,
+                                           morder mo, morder fmo);
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic16_compare_exchange_strong(volatile a16 *a, a16 *c, a16 v,
+                                            morder mo, morder fmo);
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic32_compare_exchange_strong(volatile a32 *a, a32 *c, a32 v,
+                                            morder mo, morder fmo);
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic64_compare_exchange_strong(volatile a64 *a, a64 *c, a64 v,
+                                            morder mo, morder fmo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic128_compare_exchange_strong(volatile a128 *a, a128 *c, a128 v,
+                                             morder mo, morder fmo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic8_compare_exchange_weak(volatile a8 *a, a8 *c, a8 v, morder mo,
+                                         morder fmo);
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic16_compare_exchange_weak(volatile a16 *a, a16 *c, a16 v,
+                                          morder mo, morder fmo);
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic32_compare_exchange_weak(volatile a32 *a, a32 *c, a32 v,
+                                          morder mo, morder fmo);
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic64_compare_exchange_weak(volatile a64 *a, a64 *c, a64 v,
+                                          morder mo, morder fmo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic128_compare_exchange_weak(volatile a128 *a, a128 *c, a128 v,
+                                           morder mo, morder fmo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_compare_exchange_val(volatile a8 *a, a8 c, a8 v, morder mo,
+                                       morder fmo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_compare_exchange_val(volatile a16 *a, a16 c, a16 v,
+                                         morder mo, morder fmo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_compare_exchange_val(volatile a32 *a, a32 c, a32 v,
+                                         morder mo, morder fmo);
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_compare_exchange_val(volatile a64 *a, a64 c, a64 v,
+                                         morder mo, morder fmo);
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_compare_exchange_val(volatile a128 *a, a128 c, a128 v,
+                                           morder mo, morder fmo);
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic_thread_fence(morder mo);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic_signal_fence(morder mo);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_store(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_store(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_compare_exchange(ThreadState *thr, uptr cpc, uptr pc,
+                                         u8 *a);
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_compare_exchange(ThreadState *thr, uptr cpc, uptr pc,
+                                         u8 *a);
+
+}  // extern "C"
+
+}  // namespace __tsan
+
+#endif  // TSAN_INTERFACE_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interface.inc b/compiler-rt/lib/tsan/rtl-old/tsan_interface.inc
new file mode 100644
index 0000000000000..0031800e851f4
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interface.inc
@@ -0,0 +1,182 @@
+//===-- tsan_interface.inc --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_ptrauth.h"
+#include "tsan_interface.h"
+#include "tsan_rtl.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;
+
+void __tsan_read1(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessRead);
+}
+
+void __tsan_read2(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, kAccessRead);
+}
+
+void __tsan_read4(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 4, kAccessRead);
+}
+
+void __tsan_read8(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessRead);
+}
+
+void __tsan_write1(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessWrite);
+}
+
+void __tsan_write2(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, kAccessWrite);
+}
+
+void __tsan_write4(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 4, kAccessWrite);
+}
+
+void __tsan_write8(void *addr) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessWrite);
+}
+
+void __tsan_read1_pc(void *addr, void *pc) {
+  MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessRead | kAccessExternalPC);
+}
+
+void __tsan_read2_pc(void *addr, void *pc) {
+  MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 2, kAccessRead | kAccessExternalPC);
+}
+
+void __tsan_read4_pc(void *addr, void *pc) {
+  MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 4, kAccessRead | kAccessExternalPC);
+}
+
+void __tsan_read8_pc(void *addr, void *pc) {
+  MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 8, kAccessRead | kAccessExternalPC);
+}
+
+void __tsan_write1_pc(void *addr, void *pc) {
+  MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessWrite | kAccessExternalPC);
+}
+
+void __tsan_write2_pc(void *addr, void *pc) {
+  MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 2, kAccessWrite | kAccessExternalPC);
+}
+
+void __tsan_write4_pc(void *addr, void *pc) {
+  MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 4, kAccessWrite | kAccessExternalPC);
+}
+
+void __tsan_write8_pc(void *addr, void *pc) {
+  MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 8, kAccessWrite | kAccessExternalPC);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_read2(const void *addr) {
+  UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, kAccessRead);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_read4(const void *addr) {
+  UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 4, kAccessRead);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_read8(const void *addr) {
+  UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessRead);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_write2(void *addr) {
+  UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 2, kAccessWrite);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_write4(void *addr) {
+  UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 4, kAccessWrite);
+}
+
+ALWAYS_INLINE USED void __tsan_unaligned_write8(void *addr) {
+  UnalignedMemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 8, kAccessWrite);
+}
+
+extern "C" {
+// __sanitizer_unaligned_load/store are for user instrumentation.
+SANITIZER_INTERFACE_ATTRIBUTE
+u16 __sanitizer_unaligned_load16(const uu16 *addr) {
+  __tsan_unaligned_read2(addr);
+  return *addr;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+u32 __sanitizer_unaligned_load32(const uu32 *addr) {
+  __tsan_unaligned_read4(addr);
+  return *addr;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+u64 __sanitizer_unaligned_load64(const uu64 *addr) {
+  __tsan_unaligned_read8(addr);
+  return *addr;
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_unaligned_store16(uu16 *addr, u16 v) {
+  *addr = v;
+  __tsan_unaligned_write2(addr);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_unaligned_store32(uu32 *addr, u32 v) {
+  *addr = v;
+  __tsan_unaligned_write4(addr);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_unaligned_store64(uu64 *addr, u64 v) {
+  *addr = v;
+  __tsan_unaligned_write8(addr);
+}
+}
+
+void __tsan_vptr_update(void **vptr_p, void *new_val) {
+  if (*vptr_p == new_val)
+    return;
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)vptr_p, sizeof(*vptr_p),
+               kAccessWrite | kAccessVptr);
+}
+
+void __tsan_vptr_read(void **vptr_p) {
+  MemoryAccess(cur_thread(), CALLERPC, (uptr)vptr_p, sizeof(*vptr_p),
+               kAccessRead | kAccessVptr);
+}
+
+void __tsan_func_entry(void *pc) { FuncEntry(cur_thread(), STRIP_PAC_PC(pc)); }
+
+void __tsan_func_exit() { FuncExit(cur_thread()); }
+
+void __tsan_ignore_thread_begin() { ThreadIgnoreBegin(cur_thread(), CALLERPC); }
+
+void __tsan_ignore_thread_end() { ThreadIgnoreEnd(cur_thread()); }
+
+void __tsan_read_range(void *addr, uptr size) {
+  MemoryAccessRange(cur_thread(), CALLERPC, (uptr)addr, size, false);
+}
+
+void __tsan_write_range(void *addr, uptr size) {
+  MemoryAccessRange(cur_thread(), CALLERPC, (uptr)addr, size, true);
+}
+
+void __tsan_read_range_pc(void *addr, uptr size, void *pc) {
+  MemoryAccessRange(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, size, false);
+}
+
+void __tsan_write_range_pc(void *addr, uptr size, void *pc) {
+  MemoryAccessRange(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, size, true);
+}

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interface_ann.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interface_ann.cpp
new file mode 100644
index 0000000000000..6bd72e18d9425
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interface_ann.cpp
@@ -0,0 +1,438 @@
+//===-- tsan_interface_ann.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "sanitizer_common/sanitizer_vector.h"
+#include "tsan_interface_ann.h"
+#include "tsan_report.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+#include "tsan_flags.h"
+#include "tsan_platform.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;
+
+namespace __tsan {
+
+class ScopedAnnotation {
+ public:
+  ScopedAnnotation(ThreadState *thr, const char *aname, uptr pc)
+      : thr_(thr) {
+    FuncEntry(thr_, pc);
+    DPrintf("#%d: annotation %s()\n", thr_->tid, aname);
+  }
+
+  ~ScopedAnnotation() {
+    FuncExit(thr_);
+    CheckedMutex::CheckNoLocks();
+  }
+ private:
+  ThreadState *const thr_;
+};
+
+#define SCOPED_ANNOTATION_RET(typ, ret)                     \
+  if (!flags()->enable_annotations)                         \
+    return ret;                                             \
+  ThreadState *thr = cur_thread();                          \
+  const uptr caller_pc = (uptr)__builtin_return_address(0); \
+  ScopedAnnotation sa(thr, __func__, caller_pc);            \
+  const uptr pc = StackTrace::GetCurrentPc();               \
+  (void)pc;
+
+#define SCOPED_ANNOTATION(typ) SCOPED_ANNOTATION_RET(typ, )
+
+static const int kMaxDescLen = 128;
+
+struct ExpectRace {
+  ExpectRace *next;
+  ExpectRace *prev;
+  atomic_uintptr_t hitcount;
+  atomic_uintptr_t addcount;
+  uptr addr;
+  uptr size;
+  char *file;
+  int line;
+  char desc[kMaxDescLen];
+};
+
+struct DynamicAnnContext {
+  Mutex mtx;
+  ExpectRace benign;
+
+  DynamicAnnContext() : mtx(MutexTypeAnnotations) {}
+};
+
+static DynamicAnnContext *dyn_ann_ctx;
+static char dyn_ann_ctx_placeholder[sizeof(DynamicAnnContext)] ALIGNED(64);
+
+static void AddExpectRace(ExpectRace *list,
+    char *f, int l, uptr addr, uptr size, char *desc) {
+  ExpectRace *race = list->next;
+  for (; race != list; race = race->next) {
+    if (race->addr == addr && race->size == size) {
+      atomic_store_relaxed(&race->addcount,
+          atomic_load_relaxed(&race->addcount) + 1);
+      return;
+    }
+  }
+  race = static_cast<ExpectRace *>(Alloc(sizeof(ExpectRace)));
+  race->addr = addr;
+  race->size = size;
+  race->file = f;
+  race->line = l;
+  race->desc[0] = 0;
+  atomic_store_relaxed(&race->hitcount, 0);
+  atomic_store_relaxed(&race->addcount, 1);
+  if (desc) {
+    int i = 0;
+    for (; i < kMaxDescLen - 1 && desc[i]; i++)
+      race->desc[i] = desc[i];
+    race->desc[i] = 0;
+  }
+  race->prev = list;
+  race->next = list->next;
+  race->next->prev = race;
+  list->next = race;
+}
+
+static ExpectRace *FindRace(ExpectRace *list, uptr addr, uptr size) {
+  for (ExpectRace *race = list->next; race != list; race = race->next) {
+    uptr maxbegin = max(race->addr, addr);
+    uptr minend = min(race->addr + race->size, addr + size);
+    if (maxbegin < minend)
+      return race;
+  }
+  return 0;
+}
+
+static bool CheckContains(ExpectRace *list, uptr addr, uptr size) {
+  ExpectRace *race = FindRace(list, addr, size);
+  if (race == 0)
+    return false;
+  DPrintf("Hit expected/benign race: %s addr=%zx:%d %s:%d\n",
+      race->desc, race->addr, (int)race->size, race->file, race->line);
+  atomic_fetch_add(&race->hitcount, 1, memory_order_relaxed);
+  return true;
+}
+
+static void InitList(ExpectRace *list) {
+  list->next = list;
+  list->prev = list;
+}
+
+void InitializeDynamicAnnotations() {
+  dyn_ann_ctx = new(dyn_ann_ctx_placeholder) DynamicAnnContext;
+  InitList(&dyn_ann_ctx->benign);
+}
+
+bool IsExpectedReport(uptr addr, uptr size) {
+  ReadLock lock(&dyn_ann_ctx->mtx);
+  return CheckContains(&dyn_ann_ctx->benign, addr, size);
+}
+}  // namespace __tsan
+
+using namespace __tsan;
+
+extern "C" {
+void INTERFACE_ATTRIBUTE AnnotateHappensBefore(char *f, int l, uptr addr) {
+  SCOPED_ANNOTATION(AnnotateHappensBefore);
+  Release(thr, pc, addr);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateHappensAfter(char *f, int l, uptr addr) {
+  SCOPED_ANNOTATION(AnnotateHappensAfter);
+  Acquire(thr, pc, addr);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateCondVarSignal(char *f, int l, uptr cv) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateCondVarSignalAll(char *f, int l, uptr cv) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateMutexIsNotPHB(char *f, int l, uptr mu) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateCondVarWait(char *f, int l, uptr cv,
+                                             uptr lock) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateRWLockCreate(char *f, int l, uptr m) {
+  SCOPED_ANNOTATION(AnnotateRWLockCreate);
+  MutexCreate(thr, pc, m, MutexFlagWriteReentrant);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateRWLockCreateStatic(char *f, int l, uptr m) {
+  SCOPED_ANNOTATION(AnnotateRWLockCreateStatic);
+  MutexCreate(thr, pc, m, MutexFlagWriteReentrant | MutexFlagLinkerInit);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateRWLockDestroy(char *f, int l, uptr m) {
+  SCOPED_ANNOTATION(AnnotateRWLockDestroy);
+  MutexDestroy(thr, pc, m);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateRWLockAcquired(char *f, int l, uptr m,
+                                                uptr is_w) {
+  SCOPED_ANNOTATION(AnnotateRWLockAcquired);
+  if (is_w)
+    MutexPostLock(thr, pc, m, MutexFlagDoPreLockOnPostLock);
+  else
+    MutexPostReadLock(thr, pc, m, MutexFlagDoPreLockOnPostLock);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateRWLockReleased(char *f, int l, uptr m,
+                                                uptr is_w) {
+  SCOPED_ANNOTATION(AnnotateRWLockReleased);
+  if (is_w)
+    MutexUnlock(thr, pc, m);
+  else
+    MutexReadUnlock(thr, pc, m);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateTraceMemory(char *f, int l, uptr mem) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateFlushState(char *f, int l) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateNewMemory(char *f, int l, uptr mem,
+                                           uptr size) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateNoOp(char *f, int l, uptr mem) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateFlushExpectedRaces(char *f, int l) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateEnableRaceDetection(
+    char *f, int l, int enable) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateMutexIsUsedAsCondVar(
+    char *f, int l, uptr mu) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotatePCQGet(
+    char *f, int l, uptr pcq) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotatePCQPut(
+    char *f, int l, uptr pcq) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotatePCQDestroy(
+    char *f, int l, uptr pcq) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotatePCQCreate(
+    char *f, int l, uptr pcq) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateExpectRace(
+    char *f, int l, uptr mem, char *desc) {
+}
+
+static void BenignRaceImpl(char *f, int l, uptr mem, uptr size, char *desc) {
+  Lock lock(&dyn_ann_ctx->mtx);
+  AddExpectRace(&dyn_ann_ctx->benign,
+                f, l, mem, size, desc);
+  DPrintf("Add benign race: %s addr=%zx %s:%d\n", desc, mem, f, l);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateBenignRaceSized(
+    char *f, int l, uptr mem, uptr size, char *desc) {
+  SCOPED_ANNOTATION(AnnotateBenignRaceSized);
+  BenignRaceImpl(f, l, mem, size, desc);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateBenignRace(
+    char *f, int l, uptr mem, char *desc) {
+  SCOPED_ANNOTATION(AnnotateBenignRace);
+  BenignRaceImpl(f, l, mem, 1, desc);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateIgnoreReadsBegin(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateIgnoreReadsBegin);
+  ThreadIgnoreBegin(thr, pc);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateIgnoreReadsEnd(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateIgnoreReadsEnd);
+  ThreadIgnoreEnd(thr);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateIgnoreWritesBegin(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateIgnoreWritesBegin);
+  ThreadIgnoreBegin(thr, pc);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateIgnoreWritesEnd(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateIgnoreWritesEnd);
+  ThreadIgnoreEnd(thr);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateIgnoreSyncBegin(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateIgnoreSyncBegin);
+  ThreadIgnoreSyncBegin(thr, pc);
+}
+
+void INTERFACE_ATTRIBUTE AnnotateIgnoreSyncEnd(char *f, int l) {
+  SCOPED_ANNOTATION(AnnotateIgnoreSyncEnd);
+  ThreadIgnoreSyncEnd(thr);
+}
+
+void INTERFACE_ATTRIBUTE AnnotatePublishMemoryRange(
+    char *f, int l, uptr addr, uptr size) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateUnpublishMemoryRange(
+    char *f, int l, uptr addr, uptr size) {
+}
+
+void INTERFACE_ATTRIBUTE AnnotateThreadName(
+    char *f, int l, char *name) {
+  SCOPED_ANNOTATION(AnnotateThreadName);
+  ThreadSetName(thr, name);
+}
+
+// We deliberately omit the implementation of WTFAnnotateHappensBefore() and
+// WTFAnnotateHappensAfter(). Those are being used by Webkit to annotate
+// atomic operations, which should be handled by ThreadSanitizer correctly.
+void INTERFACE_ATTRIBUTE WTFAnnotateHappensBefore(char *f, int l, uptr addr) {
+}
+
+void INTERFACE_ATTRIBUTE WTFAnnotateHappensAfter(char *f, int l, uptr addr) {
+}
+
+void INTERFACE_ATTRIBUTE WTFAnnotateBenignRaceSized(
+    char *f, int l, uptr mem, uptr sz, char *desc) {
+  SCOPED_ANNOTATION(AnnotateBenignRaceSized);
+  BenignRaceImpl(f, l, mem, sz, desc);
+}
+
+int INTERFACE_ATTRIBUTE RunningOnValgrind() {
+  return flags()->running_on_valgrind;
+}
+
+double __attribute__((weak)) INTERFACE_ATTRIBUTE ValgrindSlowdown(void) {
+  return 10.0;
+}
+
+const char INTERFACE_ATTRIBUTE* ThreadSanitizerQuery(const char *query) {
+  if (internal_strcmp(query, "pure_happens_before") == 0)
+    return "1";
+  else
+    return "0";
+}
+
+void INTERFACE_ATTRIBUTE
+AnnotateMemoryIsInitialized(char *f, int l, uptr mem, uptr sz) {}
+void INTERFACE_ATTRIBUTE
+AnnotateMemoryIsUninitialized(char *f, int l, uptr mem, uptr sz) {}
+
+// Note: the parameter is called flagz, because flags is already taken
+// by the global function that returns flags.
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_create(void *m, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_create);
+  MutexCreate(thr, pc, (uptr)m, flagz & MutexCreationFlagMask);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_destroy(void *m, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_destroy);
+  MutexDestroy(thr, pc, (uptr)m, flagz);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_pre_lock(void *m, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_pre_lock);
+  if (!(flagz & MutexFlagTryLock)) {
+    if (flagz & MutexFlagReadLock)
+      MutexPreReadLock(thr, pc, (uptr)m);
+    else
+      MutexPreLock(thr, pc, (uptr)m);
+  }
+  ThreadIgnoreBegin(thr, 0);
+  ThreadIgnoreSyncBegin(thr, 0);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_post_lock(void *m, unsigned flagz, int rec) {
+  SCOPED_ANNOTATION(__tsan_mutex_post_lock);
+  ThreadIgnoreSyncEnd(thr);
+  ThreadIgnoreEnd(thr);
+  if (!(flagz & MutexFlagTryLockFailed)) {
+    if (flagz & MutexFlagReadLock)
+      MutexPostReadLock(thr, pc, (uptr)m, flagz);
+    else
+      MutexPostLock(thr, pc, (uptr)m, flagz, rec);
+  }
+}
+
+INTERFACE_ATTRIBUTE
+int __tsan_mutex_pre_unlock(void *m, unsigned flagz) {
+  SCOPED_ANNOTATION_RET(__tsan_mutex_pre_unlock, 0);
+  int ret = 0;
+  if (flagz & MutexFlagReadLock) {
+    CHECK(!(flagz & MutexFlagRecursiveUnlock));
+    MutexReadUnlock(thr, pc, (uptr)m);
+  } else {
+    ret = MutexUnlock(thr, pc, (uptr)m, flagz);
+  }
+  ThreadIgnoreBegin(thr, 0);
+  ThreadIgnoreSyncBegin(thr, 0);
+  return ret;
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_post_unlock(void *m, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_post_unlock);
+  ThreadIgnoreSyncEnd(thr);
+  ThreadIgnoreEnd(thr);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_pre_signal(void *addr, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_pre_signal);
+  ThreadIgnoreBegin(thr, 0);
+  ThreadIgnoreSyncBegin(thr, 0);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_post_signal(void *addr, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_post_signal);
+  ThreadIgnoreSyncEnd(thr);
+  ThreadIgnoreEnd(thr);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_pre_divert(void *addr, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_pre_divert);
+  // Exit from ignore region started in __tsan_mutex_pre_lock/unlock/signal.
+  ThreadIgnoreSyncEnd(thr);
+  ThreadIgnoreEnd(thr);
+}
+
+INTERFACE_ATTRIBUTE
+void __tsan_mutex_post_divert(void *addr, unsigned flagz) {
+  SCOPED_ANNOTATION(__tsan_mutex_post_divert);
+  ThreadIgnoreBegin(thr, 0);
+  ThreadIgnoreSyncBegin(thr, 0);
+}
+}  // extern "C"

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interface_ann.h b/compiler-rt/lib/tsan/rtl-old/tsan_interface_ann.h
new file mode 100644
index 0000000000000..458d61f53356e
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interface_ann.h
@@ -0,0 +1,32 @@
+//===-- tsan_interface_ann.h ------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Interface for dynamic annotations.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_ANN_H
+#define TSAN_INTERFACE_ANN_H
+
+#include <sanitizer_common/sanitizer_internal_defs.h>
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __tsan_.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_acquire(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_release(void *addr);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  // TSAN_INTERFACE_ANN_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interface_atomic.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interface_atomic.cpp
new file mode 100644
index 0000000000000..24ba3bb1f65df
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interface_atomic.cpp
@@ -0,0 +1,920 @@
+//===-- tsan_interface_atomic.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+// ThreadSanitizer atomic operations are based on C++11/C1x standards.
+// For background see C++11 standard.  A slightly older, publicly
+// available draft of the standard (not entirely up-to-date, but close enough
+// for casual browsing) is available here:
+// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2011/n3242.pdf
+// The following page contains more background information:
+// http://www.hpl.hp.com/personal/Hans_Boehm/c++mm/
+
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+#include "tsan_flags.h"
+#include "tsan_interface.h"
+#include "tsan_rtl.h"
+
+using namespace __tsan;
+
+#if !SANITIZER_GO && __TSAN_HAS_INT128
+// Protects emulation of 128-bit atomic operations.
+static StaticSpinMutex mutex128;
+#endif
+
+#if SANITIZER_DEBUG
+static bool IsLoadOrder(morder mo) {
+  return mo == mo_relaxed || mo == mo_consume
+      || mo == mo_acquire || mo == mo_seq_cst;
+}
+
+static bool IsStoreOrder(morder mo) {
+  return mo == mo_relaxed || mo == mo_release || mo == mo_seq_cst;
+}
+#endif
+
+static bool IsReleaseOrder(morder mo) {
+  return mo == mo_release || mo == mo_acq_rel || mo == mo_seq_cst;
+}
+
+static bool IsAcquireOrder(morder mo) {
+  return mo == mo_consume || mo == mo_acquire
+      || mo == mo_acq_rel || mo == mo_seq_cst;
+}
+
+static bool IsAcqRelOrder(morder mo) {
+  return mo == mo_acq_rel || mo == mo_seq_cst;
+}
+
+template<typename T> T func_xchg(volatile T *v, T op) {
+  T res = __sync_lock_test_and_set(v, op);
+  // __sync_lock_test_and_set does not contain full barrier.
+  __sync_synchronize();
+  return res;
+}
+
+template<typename T> T func_add(volatile T *v, T op) {
+  return __sync_fetch_and_add(v, op);
+}
+
+template<typename T> T func_sub(volatile T *v, T op) {
+  return __sync_fetch_and_sub(v, op);
+}
+
+template<typename T> T func_and(volatile T *v, T op) {
+  return __sync_fetch_and_and(v, op);
+}
+
+template<typename T> T func_or(volatile T *v, T op) {
+  return __sync_fetch_and_or(v, op);
+}
+
+template<typename T> T func_xor(volatile T *v, T op) {
+  return __sync_fetch_and_xor(v, op);
+}
+
+template<typename T> T func_nand(volatile T *v, T op) {
+  // clang does not support __sync_fetch_and_nand.
+  T cmp = *v;
+  for (;;) {
+    T newv = ~(cmp & op);
+    T cur = __sync_val_compare_and_swap(v, cmp, newv);
+    if (cmp == cur)
+      return cmp;
+    cmp = cur;
+  }
+}
+
+template<typename T> T func_cas(volatile T *v, T cmp, T xch) {
+  return __sync_val_compare_and_swap(v, cmp, xch);
+}
+
+// clang does not support 128-bit atomic ops.
+// Atomic ops are executed under tsan internal mutex,
+// here we assume that the atomic variables are not accessed
+// from non-instrumented code.
+#if !defined(__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16) && !SANITIZER_GO \
+    && __TSAN_HAS_INT128
+a128 func_xchg(volatile a128 *v, a128 op) {
+  SpinMutexLock lock(&mutex128);
+  a128 cmp = *v;
+  *v = op;
+  return cmp;
+}
+
+a128 func_add(volatile a128 *v, a128 op) {
+  SpinMutexLock lock(&mutex128);
+  a128 cmp = *v;
+  *v = cmp + op;
+  return cmp;
+}
+
+a128 func_sub(volatile a128 *v, a128 op) {
+  SpinMutexLock lock(&mutex128);
+  a128 cmp = *v;
+  *v = cmp - op;
+  return cmp;
+}
+
+a128 func_and(volatile a128 *v, a128 op) {
+  SpinMutexLock lock(&mutex128);
+  a128 cmp = *v;
+  *v = cmp & op;
+  return cmp;
+}
+
+a128 func_or(volatile a128 *v, a128 op) {
+  SpinMutexLock lock(&mutex128);
+  a128 cmp = *v;
+  *v = cmp | op;
+  return cmp;
+}
+
+a128 func_xor(volatile a128 *v, a128 op) {
+  SpinMutexLock lock(&mutex128);
+  a128 cmp = *v;
+  *v = cmp ^ op;
+  return cmp;
+}
+
+a128 func_nand(volatile a128 *v, a128 op) {
+  SpinMutexLock lock(&mutex128);
+  a128 cmp = *v;
+  *v = ~(cmp & op);
+  return cmp;
+}
+
+a128 func_cas(volatile a128 *v, a128 cmp, a128 xch) {
+  SpinMutexLock lock(&mutex128);
+  a128 cur = *v;
+  if (cur == cmp)
+    *v = xch;
+  return cur;
+}
+#endif
+
+template <typename T>
+static int AccessSize() {
+  if (sizeof(T) <= 1)
+    return 1;
+  else if (sizeof(T) <= 2)
+    return 2;
+  else if (sizeof(T) <= 4)
+    return 4;
+  else
+    return 8;
+  // For 16-byte atomics we also use 8-byte memory access,
+  // this leads to false negatives only in very obscure cases.
+}
+
+#if !SANITIZER_GO
+static atomic_uint8_t *to_atomic(const volatile a8 *a) {
+  return reinterpret_cast<atomic_uint8_t *>(const_cast<a8 *>(a));
+}
+
+static atomic_uint16_t *to_atomic(const volatile a16 *a) {
+  return reinterpret_cast<atomic_uint16_t *>(const_cast<a16 *>(a));
+}
+#endif
+
+static atomic_uint32_t *to_atomic(const volatile a32 *a) {
+  return reinterpret_cast<atomic_uint32_t *>(const_cast<a32 *>(a));
+}
+
+static atomic_uint64_t *to_atomic(const volatile a64 *a) {
+  return reinterpret_cast<atomic_uint64_t *>(const_cast<a64 *>(a));
+}
+
+static memory_order to_mo(morder mo) {
+  switch (mo) {
+  case mo_relaxed: return memory_order_relaxed;
+  case mo_consume: return memory_order_consume;
+  case mo_acquire: return memory_order_acquire;
+  case mo_release: return memory_order_release;
+  case mo_acq_rel: return memory_order_acq_rel;
+  case mo_seq_cst: return memory_order_seq_cst;
+  }
+  DCHECK(0);
+  return memory_order_seq_cst;
+}
+
+template<typename T>
+static T NoTsanAtomicLoad(const volatile T *a, morder mo) {
+  return atomic_load(to_atomic(a), to_mo(mo));
+}
+
+#if __TSAN_HAS_INT128 && !SANITIZER_GO
+static a128 NoTsanAtomicLoad(const volatile a128 *a, morder mo) {
+  SpinMutexLock lock(&mutex128);
+  return *a;
+}
+#endif
+
+template <typename T>
+static T AtomicLoad(ThreadState *thr, uptr pc, const volatile T *a, morder mo) {
+  DCHECK(IsLoadOrder(mo));
+  // This fast-path is critical for performance.
+  // Assume the access is atomic.
+  if (!IsAcquireOrder(mo)) {
+    MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(),
+                 kAccessRead | kAccessAtomic);
+    return NoTsanAtomicLoad(a, mo);
+  }
+  // Don't create sync object if it does not exist yet. For example, an atomic
+  // pointer is initialized to nullptr and then periodically acquire-loaded.
+  T v = NoTsanAtomicLoad(a, mo);
+  SyncVar *s = ctx->metamap.GetSyncIfExists((uptr)a);
+  if (s) {
+    ReadLock l(&s->mtx);
+    AcquireImpl(thr, pc, &s->clock);
+    // Re-read under sync mutex because we need a consistent snapshot
+    // of the value and the clock we acquire.
+    v = NoTsanAtomicLoad(a, mo);
+  }
+  MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessRead | kAccessAtomic);
+  return v;
+}
+
+template<typename T>
+static void NoTsanAtomicStore(volatile T *a, T v, morder mo) {
+  atomic_store(to_atomic(a), v, to_mo(mo));
+}
+
+#if __TSAN_HAS_INT128 && !SANITIZER_GO
+static void NoTsanAtomicStore(volatile a128 *a, a128 v, morder mo) {
+  SpinMutexLock lock(&mutex128);
+  *a = v;
+}
+#endif
+
+template <typename T>
+static void AtomicStore(ThreadState *thr, uptr pc, volatile T *a, T v,
+                        morder mo) {
+  DCHECK(IsStoreOrder(mo));
+  MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
+  // This fast-path is critical for performance.
+  // Assume the access is atomic.
+  // Strictly saying even relaxed store cuts off release sequence,
+  // so must reset the clock.
+  if (!IsReleaseOrder(mo)) {
+    NoTsanAtomicStore(a, v, mo);
+    return;
+  }
+  __sync_synchronize();
+  SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+  Lock l(&s->mtx);
+  thr->fast_state.IncrementEpoch();
+  // Can't increment epoch w/o writing to the trace as well.
+  TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+  ReleaseStoreImpl(thr, pc, &s->clock);
+  NoTsanAtomicStore(a, v, mo);
+}
+
+template <typename T, T (*F)(volatile T *v, T op)>
+static T AtomicRMW(ThreadState *thr, uptr pc, volatile T *a, T v, morder mo) {
+  MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
+  if (LIKELY(mo == mo_relaxed))
+    return F(a, v);
+  SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+  Lock l(&s->mtx);
+  thr->fast_state.IncrementEpoch();
+  // Can't increment epoch w/o writing to the trace as well.
+  TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+  if (IsAcqRelOrder(mo))
+    AcquireReleaseImpl(thr, pc, &s->clock);
+  else if (IsReleaseOrder(mo))
+    ReleaseImpl(thr, pc, &s->clock);
+  else if (IsAcquireOrder(mo))
+    AcquireImpl(thr, pc, &s->clock);
+  return F(a, v);
+}
+
+template<typename T>
+static T NoTsanAtomicExchange(volatile T *a, T v, morder mo) {
+  return func_xchg(a, v);
+}
+
+template<typename T>
+static T NoTsanAtomicFetchAdd(volatile T *a, T v, morder mo) {
+  return func_add(a, v);
+}
+
+template<typename T>
+static T NoTsanAtomicFetchSub(volatile T *a, T v, morder mo) {
+  return func_sub(a, v);
+}
+
+template<typename T>
+static T NoTsanAtomicFetchAnd(volatile T *a, T v, morder mo) {
+  return func_and(a, v);
+}
+
+template<typename T>
+static T NoTsanAtomicFetchOr(volatile T *a, T v, morder mo) {
+  return func_or(a, v);
+}
+
+template<typename T>
+static T NoTsanAtomicFetchXor(volatile T *a, T v, morder mo) {
+  return func_xor(a, v);
+}
+
+template<typename T>
+static T NoTsanAtomicFetchNand(volatile T *a, T v, morder mo) {
+  return func_nand(a, v);
+}
+
+template<typename T>
+static T AtomicExchange(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  return AtomicRMW<T, func_xchg>(thr, pc, a, v, mo);
+}
+
+template<typename T>
+static T AtomicFetchAdd(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  return AtomicRMW<T, func_add>(thr, pc, a, v, mo);
+}
+
+template<typename T>
+static T AtomicFetchSub(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  return AtomicRMW<T, func_sub>(thr, pc, a, v, mo);
+}
+
+template<typename T>
+static T AtomicFetchAnd(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  return AtomicRMW<T, func_and>(thr, pc, a, v, mo);
+}
+
+template<typename T>
+static T AtomicFetchOr(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  return AtomicRMW<T, func_or>(thr, pc, a, v, mo);
+}
+
+template<typename T>
+static T AtomicFetchXor(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  return AtomicRMW<T, func_xor>(thr, pc, a, v, mo);
+}
+
+template<typename T>
+static T AtomicFetchNand(ThreadState *thr, uptr pc, volatile T *a, T v,
+    morder mo) {
+  return AtomicRMW<T, func_nand>(thr, pc, a, v, mo);
+}
+
+template<typename T>
+static bool NoTsanAtomicCAS(volatile T *a, T *c, T v, morder mo, morder fmo) {
+  return atomic_compare_exchange_strong(to_atomic(a), c, v, to_mo(mo));
+}
+
+#if __TSAN_HAS_INT128
+static bool NoTsanAtomicCAS(volatile a128 *a, a128 *c, a128 v,
+    morder mo, morder fmo) {
+  a128 old = *c;
+  a128 cur = func_cas(a, old, v);
+  if (cur == old)
+    return true;
+  *c = cur;
+  return false;
+}
+#endif
+
+template<typename T>
+static T NoTsanAtomicCAS(volatile T *a, T c, T v, morder mo, morder fmo) {
+  NoTsanAtomicCAS(a, &c, v, mo, fmo);
+  return c;
+}
+
+template <typename T>
+static bool AtomicCAS(ThreadState *thr, uptr pc, volatile T *a, T *c, T v,
+                      morder mo, morder fmo) {
+  // 31.7.2.18: "The failure argument shall not be memory_order_release
+  // nor memory_order_acq_rel". LLVM (2021-05) fallbacks to Monotonic
+  // (mo_relaxed) when those are used.
+  DCHECK(IsLoadOrder(fmo));
+
+  MemoryAccess(thr, pc, (uptr)a, AccessSize<T>(), kAccessWrite | kAccessAtomic);
+  if (LIKELY(mo == mo_relaxed && fmo == mo_relaxed)) {
+    T cc = *c;
+    T pr = func_cas(a, cc, v);
+    if (pr == cc)
+      return true;
+    *c = pr;
+    return false;
+  }
+
+  bool release = IsReleaseOrder(mo);
+  SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, (uptr)a, false);
+  RWLock l(&s->mtx, release);
+  T cc = *c;
+  T pr = func_cas(a, cc, v);
+  bool success = pr == cc;
+  if (!success) {
+    *c = pr;
+    mo = fmo;
+  }
+  thr->fast_state.IncrementEpoch();
+  // Can't increment epoch w/o writing to the trace as well.
+  TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+
+  if (success && IsAcqRelOrder(mo))
+    AcquireReleaseImpl(thr, pc, &s->clock);
+  else if (success && IsReleaseOrder(mo))
+    ReleaseImpl(thr, pc, &s->clock);
+  else if (IsAcquireOrder(mo))
+    AcquireImpl(thr, pc, &s->clock);
+  return success;
+}
+
+template<typename T>
+static T AtomicCAS(ThreadState *thr, uptr pc,
+    volatile T *a, T c, T v, morder mo, morder fmo) {
+  AtomicCAS(thr, pc, a, &c, v, mo, fmo);
+  return c;
+}
+
+#if !SANITIZER_GO
+static void NoTsanAtomicFence(morder mo) {
+  __sync_synchronize();
+}
+
+static void AtomicFence(ThreadState *thr, uptr pc, morder mo) {
+  // FIXME(dvyukov): not implemented.
+  __sync_synchronize();
+}
+#endif
+
+// Interface functions follow.
+#if !SANITIZER_GO
+
+// C/C++
+
+static morder convert_morder(morder mo) {
+  if (flags()->force_seq_cst_atomics)
+    return (morder)mo_seq_cst;
+
+  // Filter out additional memory order flags:
+  // MEMMODEL_SYNC        = 1 << 15
+  // __ATOMIC_HLE_ACQUIRE = 1 << 16
+  // __ATOMIC_HLE_RELEASE = 1 << 17
+  //
+  // HLE is an optimization, and we pretend that elision always fails.
+  // MEMMODEL_SYNC is used when lowering __sync_ atomics,
+  // since we use __sync_ atomics for actual atomic operations,
+  // we can safely ignore it as well. It also subtly affects semantics,
+  // but we don't model the 
diff erence.
+  return (morder)(mo & 0x7fff);
+}
+
+#  define ATOMIC_IMPL(func, ...)                                \
+    ThreadState *const thr = cur_thread();                      \
+    ProcessPendingSignals(thr);                                 \
+    if (UNLIKELY(thr->ignore_sync || thr->ignore_interceptors)) \
+      return NoTsanAtomic##func(__VA_ARGS__);                   \
+    mo = convert_morder(mo);                                    \
+    return Atomic##func(thr, GET_CALLER_PC(), __VA_ARGS__);
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_load(const volatile a8 *a, morder mo) {
+  ATOMIC_IMPL(Load, a, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_load(const volatile a16 *a, morder mo) {
+  ATOMIC_IMPL(Load, a, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_load(const volatile a32 *a, morder mo) {
+  ATOMIC_IMPL(Load, a, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_load(const volatile a64 *a, morder mo) {
+  ATOMIC_IMPL(Load, a, mo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_load(const volatile a128 *a, morder mo) {
+  ATOMIC_IMPL(Load, a, mo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic8_store(volatile a8 *a, a8 v, morder mo) {
+  ATOMIC_IMPL(Store, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic16_store(volatile a16 *a, a16 v, morder mo) {
+  ATOMIC_IMPL(Store, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic32_store(volatile a32 *a, a32 v, morder mo) {
+  ATOMIC_IMPL(Store, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic64_store(volatile a64 *a, a64 v, morder mo) {
+  ATOMIC_IMPL(Store, a, v, mo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic128_store(volatile a128 *a, a128 v, morder mo) {
+  ATOMIC_IMPL(Store, a, v, mo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_exchange(volatile a8 *a, a8 v, morder mo) {
+  ATOMIC_IMPL(Exchange, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_exchange(volatile a16 *a, a16 v, morder mo) {
+  ATOMIC_IMPL(Exchange, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_exchange(volatile a32 *a, a32 v, morder mo) {
+  ATOMIC_IMPL(Exchange, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_exchange(volatile a64 *a, a64 v, morder mo) {
+  ATOMIC_IMPL(Exchange, a, v, mo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_exchange(volatile a128 *a, a128 v, morder mo) {
+  ATOMIC_IMPL(Exchange, a, v, mo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_add(volatile a8 *a, a8 v, morder mo) {
+  ATOMIC_IMPL(FetchAdd, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_add(volatile a16 *a, a16 v, morder mo) {
+  ATOMIC_IMPL(FetchAdd, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_add(volatile a32 *a, a32 v, morder mo) {
+  ATOMIC_IMPL(FetchAdd, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_add(volatile a64 *a, a64 v, morder mo) {
+  ATOMIC_IMPL(FetchAdd, a, v, mo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_add(volatile a128 *a, a128 v, morder mo) {
+  ATOMIC_IMPL(FetchAdd, a, v, mo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_sub(volatile a8 *a, a8 v, morder mo) {
+  ATOMIC_IMPL(FetchSub, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_sub(volatile a16 *a, a16 v, morder mo) {
+  ATOMIC_IMPL(FetchSub, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_sub(volatile a32 *a, a32 v, morder mo) {
+  ATOMIC_IMPL(FetchSub, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_sub(volatile a64 *a, a64 v, morder mo) {
+  ATOMIC_IMPL(FetchSub, a, v, mo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_sub(volatile a128 *a, a128 v, morder mo) {
+  ATOMIC_IMPL(FetchSub, a, v, mo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_and(volatile a8 *a, a8 v, morder mo) {
+  ATOMIC_IMPL(FetchAnd, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_and(volatile a16 *a, a16 v, morder mo) {
+  ATOMIC_IMPL(FetchAnd, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_and(volatile a32 *a, a32 v, morder mo) {
+  ATOMIC_IMPL(FetchAnd, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_and(volatile a64 *a, a64 v, morder mo) {
+  ATOMIC_IMPL(FetchAnd, a, v, mo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_and(volatile a128 *a, a128 v, morder mo) {
+  ATOMIC_IMPL(FetchAnd, a, v, mo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_or(volatile a8 *a, a8 v, morder mo) {
+  ATOMIC_IMPL(FetchOr, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_or(volatile a16 *a, a16 v, morder mo) {
+  ATOMIC_IMPL(FetchOr, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_or(volatile a32 *a, a32 v, morder mo) {
+  ATOMIC_IMPL(FetchOr, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_or(volatile a64 *a, a64 v, morder mo) {
+  ATOMIC_IMPL(FetchOr, a, v, mo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_or(volatile a128 *a, a128 v, morder mo) {
+  ATOMIC_IMPL(FetchOr, a, v, mo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_xor(volatile a8 *a, a8 v, morder mo) {
+  ATOMIC_IMPL(FetchXor, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_xor(volatile a16 *a, a16 v, morder mo) {
+  ATOMIC_IMPL(FetchXor, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_xor(volatile a32 *a, a32 v, morder mo) {
+  ATOMIC_IMPL(FetchXor, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_xor(volatile a64 *a, a64 v, morder mo) {
+  ATOMIC_IMPL(FetchXor, a, v, mo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_xor(volatile a128 *a, a128 v, morder mo) {
+  ATOMIC_IMPL(FetchXor, a, v, mo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_fetch_nand(volatile a8 *a, a8 v, morder mo) {
+  ATOMIC_IMPL(FetchNand, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_fetch_nand(volatile a16 *a, a16 v, morder mo) {
+  ATOMIC_IMPL(FetchNand, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_fetch_nand(volatile a32 *a, a32 v, morder mo) {
+  ATOMIC_IMPL(FetchNand, a, v, mo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_fetch_nand(volatile a64 *a, a64 v, morder mo) {
+  ATOMIC_IMPL(FetchNand, a, v, mo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_fetch_nand(volatile a128 *a, a128 v, morder mo) {
+  ATOMIC_IMPL(FetchNand, a, v, mo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic8_compare_exchange_strong(volatile a8 *a, a8 *c, a8 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic16_compare_exchange_strong(volatile a16 *a, a16 *c, a16 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic32_compare_exchange_strong(volatile a32 *a, a32 *c, a32 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic64_compare_exchange_strong(volatile a64 *a, a64 *c, a64 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic128_compare_exchange_strong(volatile a128 *a, a128 *c, a128 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic8_compare_exchange_weak(volatile a8 *a, a8 *c, a8 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic16_compare_exchange_weak(volatile a16 *a, a16 *c, a16 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic32_compare_exchange_weak(volatile a32 *a, a32 *c, a32 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic64_compare_exchange_weak(volatile a64 *a, a64 *c, a64 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+int __tsan_atomic128_compare_exchange_weak(volatile a128 *a, a128 *c, a128 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a8 __tsan_atomic8_compare_exchange_val(volatile a8 *a, a8 c, a8 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a16 __tsan_atomic16_compare_exchange_val(volatile a16 *a, a16 c, a16 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a32 __tsan_atomic32_compare_exchange_val(volatile a32 *a, a32 c, a32 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+a64 __tsan_atomic64_compare_exchange_val(volatile a64 *a, a64 c, a64 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+
+#if __TSAN_HAS_INT128
+SANITIZER_INTERFACE_ATTRIBUTE
+a128 __tsan_atomic128_compare_exchange_val(volatile a128 *a, a128 c, a128 v,
+    morder mo, morder fmo) {
+  ATOMIC_IMPL(CAS, a, c, v, mo, fmo);
+}
+#endif
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic_thread_fence(morder mo) { ATOMIC_IMPL(Fence, mo); }
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_atomic_signal_fence(morder mo) {
+}
+}  // extern "C"
+
+#else  // #if !SANITIZER_GO
+
+// Go
+
+#  define ATOMIC(func, ...)               \
+    if (thr->ignore_sync) {               \
+      NoTsanAtomic##func(__VA_ARGS__);    \
+    } else {                              \
+      FuncEntry(thr, cpc);                \
+      Atomic##func(thr, pc, __VA_ARGS__); \
+      FuncExit(thr);                      \
+    }
+
+#  define ATOMIC_RET(func, ret, ...)              \
+    if (thr->ignore_sync) {                       \
+      (ret) = NoTsanAtomic##func(__VA_ARGS__);    \
+    } else {                                      \
+      FuncEntry(thr, cpc);                        \
+      (ret) = Atomic##func(thr, pc, __VA_ARGS__); \
+      FuncExit(thr);                              \
+    }
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC_RET(Load, *(a32*)(a+8), *(a32**)a, mo_acquire);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_load(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC_RET(Load, *(a64*)(a+8), *(a64**)a, mo_acquire);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_store(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC(Store, *(a32**)a, *(a32*)(a+8), mo_release);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_store(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC(Store, *(a64**)a, *(a64*)(a+8), mo_release);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC_RET(FetchAdd, *(a32*)(a+16), *(a32**)a, *(a32*)(a+8), mo_acq_rel);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_fetch_add(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC_RET(FetchAdd, *(a64*)(a+16), *(a64**)a, *(a64*)(a+8), mo_acq_rel);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC_RET(Exchange, *(a32*)(a+16), *(a32**)a, *(a32*)(a+8), mo_acq_rel);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_exchange(ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  ATOMIC_RET(Exchange, *(a64*)(a+16), *(a64**)a, *(a64*)(a+8), mo_acq_rel);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic32_compare_exchange(
+    ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  a32 cur = 0;
+  a32 cmp = *(a32*)(a+8);
+  ATOMIC_RET(CAS, cur, *(a32**)a, cmp, *(a32*)(a+12), mo_acq_rel, mo_acquire);
+  *(bool*)(a+16) = (cur == cmp);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void __tsan_go_atomic64_compare_exchange(
+    ThreadState *thr, uptr cpc, uptr pc, u8 *a) {
+  a64 cur = 0;
+  a64 cmp = *(a64*)(a+8);
+  ATOMIC_RET(CAS, cur, *(a64**)a, cmp, *(a64*)(a+16), mo_acq_rel, mo_acquire);
+  *(bool*)(a+24) = (cur == cmp);
+}
+}  // extern "C"
+#endif  // #if !SANITIZER_GO

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interface_java.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_interface_java.cpp
new file mode 100644
index 0000000000000..c090c1f08cbeb
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interface_java.cpp
@@ -0,0 +1,258 @@
+//===-- tsan_interface_java.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_interface_java.h"
+#include "tsan_rtl.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+
+using namespace __tsan;
+
+const jptr kHeapAlignment = 8;
+
+namespace __tsan {
+
+struct JavaContext {
+  const uptr heap_begin;
+  const uptr heap_size;
+
+  JavaContext(jptr heap_begin, jptr heap_size)
+      : heap_begin(heap_begin)
+      , heap_size(heap_size) {
+  }
+};
+
+static u64 jctx_buf[sizeof(JavaContext) / sizeof(u64) + 1];
+static JavaContext *jctx;
+
+MBlock *JavaHeapBlock(uptr addr, uptr *start) {
+  if (!jctx || addr < jctx->heap_begin ||
+      addr >= jctx->heap_begin + jctx->heap_size)
+    return nullptr;
+  for (uptr p = RoundDown(addr, kMetaShadowCell); p >= jctx->heap_begin;
+       p -= kMetaShadowCell) {
+    MBlock *b = ctx->metamap.GetBlock(p);
+    if (!b)
+      continue;
+    if (p + b->siz <= addr)
+      return nullptr;
+    *start = p;
+    return b;
+  }
+  return nullptr;
+}
+
+}  // namespace __tsan
+
+#define JAVA_FUNC_ENTER(func)      \
+  ThreadState *thr = cur_thread(); \
+  (void)thr;
+
+void __tsan_java_init(jptr heap_begin, jptr heap_size) {
+  JAVA_FUNC_ENTER(__tsan_java_init);
+  Initialize(thr);
+  DPrintf("#%d: java_init(0x%zx, 0x%zx)\n", thr->tid, heap_begin, heap_size);
+  DCHECK_EQ(jctx, 0);
+  DCHECK_GT(heap_begin, 0);
+  DCHECK_GT(heap_size, 0);
+  DCHECK_EQ(heap_begin % kHeapAlignment, 0);
+  DCHECK_EQ(heap_size % kHeapAlignment, 0);
+  DCHECK_LT(heap_begin, heap_begin + heap_size);
+  jctx = new(jctx_buf) JavaContext(heap_begin, heap_size);
+}
+
+int  __tsan_java_fini() {
+  JAVA_FUNC_ENTER(__tsan_java_fini);
+  DPrintf("#%d: java_fini()\n", thr->tid);
+  DCHECK_NE(jctx, 0);
+  // FIXME(dvyukov): this does not call atexit() callbacks.
+  int status = Finalize(thr);
+  DPrintf("#%d: java_fini() = %d\n", thr->tid, status);
+  return status;
+}
+
+void __tsan_java_alloc(jptr ptr, jptr size) {
+  JAVA_FUNC_ENTER(__tsan_java_alloc);
+  DPrintf("#%d: java_alloc(0x%zx, 0x%zx)\n", thr->tid, ptr, size);
+  DCHECK_NE(jctx, 0);
+  DCHECK_NE(size, 0);
+  DCHECK_EQ(ptr % kHeapAlignment, 0);
+  DCHECK_EQ(size % kHeapAlignment, 0);
+  DCHECK_GE(ptr, jctx->heap_begin);
+  DCHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
+
+  OnUserAlloc(thr, 0, ptr, size, false);
+}
+
+void __tsan_java_free(jptr ptr, jptr size) {
+  JAVA_FUNC_ENTER(__tsan_java_free);
+  DPrintf("#%d: java_free(0x%zx, 0x%zx)\n", thr->tid, ptr, size);
+  DCHECK_NE(jctx, 0);
+  DCHECK_NE(size, 0);
+  DCHECK_EQ(ptr % kHeapAlignment, 0);
+  DCHECK_EQ(size % kHeapAlignment, 0);
+  DCHECK_GE(ptr, jctx->heap_begin);
+  DCHECK_LE(ptr + size, jctx->heap_begin + jctx->heap_size);
+
+  ctx->metamap.FreeRange(thr->proc(), ptr, size);
+}
+
+void __tsan_java_move(jptr src, jptr dst, jptr size) {
+  JAVA_FUNC_ENTER(__tsan_java_move);
+  DPrintf("#%d: java_move(0x%zx, 0x%zx, 0x%zx)\n", thr->tid, src, dst, size);
+  DCHECK_NE(jctx, 0);
+  DCHECK_NE(size, 0);
+  DCHECK_EQ(src % kHeapAlignment, 0);
+  DCHECK_EQ(dst % kHeapAlignment, 0);
+  DCHECK_EQ(size % kHeapAlignment, 0);
+  DCHECK_GE(src, jctx->heap_begin);
+  DCHECK_LE(src + size, jctx->heap_begin + jctx->heap_size);
+  DCHECK_GE(dst, jctx->heap_begin);
+  DCHECK_LE(dst + size, jctx->heap_begin + jctx->heap_size);
+  DCHECK_NE(dst, src);
+  DCHECK_NE(size, 0);
+
+  // Assuming it's not running concurrently with threads that do
+  // memory accesses and mutex operations (stop-the-world phase).
+  ctx->metamap.MoveMemory(src, dst, size);
+
+  // Clear the destination shadow range.
+  // We used to move shadow from src to dst, but the trace format does not
+  // support that anymore as it contains addresses of accesses.
+  RawShadow *d = MemToShadow(dst);
+  RawShadow *dend = MemToShadow(dst + size);
+  internal_memset(d, 0, (dend - d) * sizeof(*d));
+}
+
+jptr __tsan_java_find(jptr *from_ptr, jptr to) {
+  JAVA_FUNC_ENTER(__tsan_java_find);
+  DPrintf("#%d: java_find(&0x%zx, 0x%zx)\n", thr->tid, *from_ptr, to);
+  DCHECK_EQ((*from_ptr) % kHeapAlignment, 0);
+  DCHECK_EQ(to % kHeapAlignment, 0);
+  DCHECK_GE(*from_ptr, jctx->heap_begin);
+  DCHECK_LE(to, jctx->heap_begin + jctx->heap_size);
+  for (uptr from = *from_ptr; from < to; from += kHeapAlignment) {
+    MBlock *b = ctx->metamap.GetBlock(from);
+    if (b) {
+      *from_ptr = from;
+      return b->siz;
+    }
+  }
+  return 0;
+}
+
+void __tsan_java_finalize() {
+  JAVA_FUNC_ENTER(__tsan_java_finalize);
+  DPrintf("#%d: java_finalize()\n", thr->tid);
+  AcquireGlobal(thr);
+}
+
+void __tsan_java_mutex_lock(jptr addr) {
+  JAVA_FUNC_ENTER(__tsan_java_mutex_lock);
+  DPrintf("#%d: java_mutex_lock(0x%zx)\n", thr->tid, addr);
+  DCHECK_NE(jctx, 0);
+  DCHECK_GE(addr, jctx->heap_begin);
+  DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  MutexPostLock(thr, 0, addr,
+                MutexFlagLinkerInit | MutexFlagWriteReentrant |
+                    MutexFlagDoPreLockOnPostLock);
+}
+
+void __tsan_java_mutex_unlock(jptr addr) {
+  JAVA_FUNC_ENTER(__tsan_java_mutex_unlock);
+  DPrintf("#%d: java_mutex_unlock(0x%zx)\n", thr->tid, addr);
+  DCHECK_NE(jctx, 0);
+  DCHECK_GE(addr, jctx->heap_begin);
+  DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  MutexUnlock(thr, 0, addr);
+}
+
+void __tsan_java_mutex_read_lock(jptr addr) {
+  JAVA_FUNC_ENTER(__tsan_java_mutex_read_lock);
+  DPrintf("#%d: java_mutex_read_lock(0x%zx)\n", thr->tid, addr);
+  DCHECK_NE(jctx, 0);
+  DCHECK_GE(addr, jctx->heap_begin);
+  DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  MutexPostReadLock(thr, 0, addr,
+                    MutexFlagLinkerInit | MutexFlagWriteReentrant |
+                        MutexFlagDoPreLockOnPostLock);
+}
+
+void __tsan_java_mutex_read_unlock(jptr addr) {
+  JAVA_FUNC_ENTER(__tsan_java_mutex_read_unlock);
+  DPrintf("#%d: java_mutex_read_unlock(0x%zx)\n", thr->tid, addr);
+  DCHECK_NE(jctx, 0);
+  DCHECK_GE(addr, jctx->heap_begin);
+  DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  MutexReadUnlock(thr, 0, addr);
+}
+
+void __tsan_java_mutex_lock_rec(jptr addr, int rec) {
+  JAVA_FUNC_ENTER(__tsan_java_mutex_lock_rec);
+  DPrintf("#%d: java_mutex_lock_rec(0x%zx, %d)\n", thr->tid, addr, rec);
+  DCHECK_NE(jctx, 0);
+  DCHECK_GE(addr, jctx->heap_begin);
+  DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+  DCHECK_GT(rec, 0);
+
+  MutexPostLock(thr, 0, addr,
+                MutexFlagLinkerInit | MutexFlagWriteReentrant |
+                    MutexFlagDoPreLockOnPostLock | MutexFlagRecursiveLock,
+                rec);
+}
+
+int __tsan_java_mutex_unlock_rec(jptr addr) {
+  JAVA_FUNC_ENTER(__tsan_java_mutex_unlock_rec);
+  DPrintf("#%d: java_mutex_unlock_rec(0x%zx)\n", thr->tid, addr);
+  DCHECK_NE(jctx, 0);
+  DCHECK_GE(addr, jctx->heap_begin);
+  DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  return MutexUnlock(thr, 0, addr, MutexFlagRecursiveUnlock);
+}
+
+void __tsan_java_acquire(jptr addr) {
+  JAVA_FUNC_ENTER(__tsan_java_acquire);
+  DPrintf("#%d: java_acquire(0x%zx)\n", thr->tid, addr);
+  DCHECK_NE(jctx, 0);
+  DCHECK_GE(addr, jctx->heap_begin);
+  DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  Acquire(thr, 0, addr);
+}
+
+void __tsan_java_release(jptr addr) {
+  JAVA_FUNC_ENTER(__tsan_java_release);
+  DPrintf("#%d: java_release(0x%zx)\n", thr->tid, addr);
+  DCHECK_NE(jctx, 0);
+  DCHECK_GE(addr, jctx->heap_begin);
+  DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  Release(thr, 0, addr);
+}
+
+void __tsan_java_release_store(jptr addr) {
+  JAVA_FUNC_ENTER(__tsan_java_release);
+  DPrintf("#%d: java_release_store(0x%zx)\n", thr->tid, addr);
+  DCHECK_NE(jctx, 0);
+  DCHECK_GE(addr, jctx->heap_begin);
+  DCHECK_LT(addr, jctx->heap_begin + jctx->heap_size);
+
+  ReleaseStore(thr, 0, addr);
+}

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_interface_java.h b/compiler-rt/lib/tsan/rtl-old/tsan_interface_java.h
new file mode 100644
index 0000000000000..51b445251e09b
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_interface_java.h
@@ -0,0 +1,99 @@
+//===-- tsan_interface_java.h -----------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Interface for verification of Java or mixed Java/C++ programs.
+// The interface is intended to be used from within a JVM and notify TSan
+// about such events like Java locks and GC memory compaction.
+//
+// For plain memory accesses and function entry/exit a JVM is intended to use
+// C++ interfaces: __tsan_readN/writeN and __tsan_func_enter/exit.
+//
+// For volatile memory accesses and atomic operations JVM is intended to use
+// standard atomics API: __tsan_atomicN_load/store/etc.
+//
+// For usage examples see lit_tests/java_*.cpp
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_JAVA_H
+#define TSAN_INTERFACE_JAVA_H
+
+#ifndef INTERFACE_ATTRIBUTE
+# define INTERFACE_ATTRIBUTE __attribute__((visibility("default")))
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef unsigned long jptr;
+
+// Must be called before any other callback from Java.
+void __tsan_java_init(jptr heap_begin, jptr heap_size) INTERFACE_ATTRIBUTE;
+// Must be called when the application exits.
+// Not necessary the last callback (concurrently running threads are OK).
+// Returns exit status or 0 if tsan does not want to override it.
+int  __tsan_java_fini() INTERFACE_ATTRIBUTE;
+
+// Callback for memory allocations.
+// May be omitted for allocations that are not subject to data races
+// nor contain synchronization objects (e.g. String).
+void __tsan_java_alloc(jptr ptr, jptr size) INTERFACE_ATTRIBUTE;
+// Callback for memory free.
+// Can be aggregated for several objects (preferably).
+void __tsan_java_free(jptr ptr, jptr size) INTERFACE_ATTRIBUTE;
+// Callback for memory move by GC.
+// Can be aggregated for several objects (preferably).
+// The ranges can overlap.
+void __tsan_java_move(jptr src, jptr dst, jptr size) INTERFACE_ATTRIBUTE;
+// This function must be called on the finalizer thread
+// before executing a batch of finalizers.
+// It ensures necessary synchronization between
+// java object creation and finalization.
+void __tsan_java_finalize() INTERFACE_ATTRIBUTE;
+// Finds the first allocated memory block in the [*from_ptr, to) range, saves
+// its address in *from_ptr and returns its size. Returns 0 if there are no
+// allocated memory blocks in the range.
+jptr __tsan_java_find(jptr *from_ptr, jptr to) INTERFACE_ATTRIBUTE;
+
+// Mutex lock.
+// Addr is any unique address associated with the mutex.
+// Can be called on recursive reentry.
+void __tsan_java_mutex_lock(jptr addr) INTERFACE_ATTRIBUTE;
+// Mutex unlock.
+void __tsan_java_mutex_unlock(jptr addr) INTERFACE_ATTRIBUTE;
+// Mutex read lock.
+void __tsan_java_mutex_read_lock(jptr addr) INTERFACE_ATTRIBUTE;
+// Mutex read unlock.
+void __tsan_java_mutex_read_unlock(jptr addr) INTERFACE_ATTRIBUTE;
+// Recursive mutex lock, intended for handling of Object.wait().
+// The 'rec' value must be obtained from the previous
+// __tsan_java_mutex_unlock_rec().
+void __tsan_java_mutex_lock_rec(jptr addr, int rec) INTERFACE_ATTRIBUTE;
+// Recursive mutex unlock, intended for handling of Object.wait().
+// The return value says how many times this thread called lock()
+// w/o a pairing unlock() (i.e. how many recursive levels it unlocked).
+// It must be passed back to __tsan_java_mutex_lock_rec() to restore
+// the same recursion level.
+int __tsan_java_mutex_unlock_rec(jptr addr) INTERFACE_ATTRIBUTE;
+
+// Raw acquire/release primitives.
+// Can be used to establish happens-before edges on volatile/final fields,
+// in atomic operations, etc. release_store is the same as release, but it
+// breaks release sequence on addr (see C++ standard 1.10/7 for details).
+void __tsan_java_acquire(jptr addr) INTERFACE_ATTRIBUTE;
+void __tsan_java_release(jptr addr) INTERFACE_ATTRIBUTE;
+void __tsan_java_release_store(jptr addr) INTERFACE_ATTRIBUTE;
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#undef INTERFACE_ATTRIBUTE
+
+#endif  // #ifndef TSAN_INTERFACE_JAVA_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_malloc_mac.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_malloc_mac.cpp
new file mode 100644
index 0000000000000..0e861bf1f9625
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_malloc_mac.cpp
@@ -0,0 +1,71 @@
+//===-- tsan_malloc_mac.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Mac-specific malloc interception.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_MAC
+
+#include "sanitizer_common/sanitizer_errno.h"
+#include "tsan_interceptors.h"
+#include "tsan_stack_trace.h"
+
+using namespace __tsan;
+#define COMMON_MALLOC_ZONE_NAME "tsan"
+#define COMMON_MALLOC_ENTER()
+#define COMMON_MALLOC_SANITIZER_INITIALIZED (cur_thread()->is_inited)
+#define COMMON_MALLOC_FORCE_LOCK()
+#define COMMON_MALLOC_FORCE_UNLOCK()
+#define COMMON_MALLOC_MEMALIGN(alignment, size) \
+  void *p =                                     \
+      user_memalign(cur_thread(), StackTrace::GetCurrentPc(), alignment, size)
+#define COMMON_MALLOC_MALLOC(size)                             \
+  if (in_symbolizer()) return InternalAlloc(size);             \
+  SCOPED_INTERCEPTOR_RAW(malloc, size);                        \
+  void *p = user_alloc(thr, pc, size)
+#define COMMON_MALLOC_REALLOC(ptr, size)                              \
+  if (in_symbolizer()) return InternalRealloc(ptr, size);             \
+  SCOPED_INTERCEPTOR_RAW(realloc, ptr, size);                         \
+  void *p = user_realloc(thr, pc, ptr, size)
+#define COMMON_MALLOC_CALLOC(count, size)                              \
+  if (in_symbolizer()) return InternalCalloc(count, size);             \
+  SCOPED_INTERCEPTOR_RAW(calloc, size, count);                         \
+  void *p = user_calloc(thr, pc, size, count)
+#define COMMON_MALLOC_POSIX_MEMALIGN(memptr, alignment, size)      \
+  if (in_symbolizer()) {                                           \
+    void *p = InternalAlloc(size, nullptr, alignment);             \
+    if (!p) return errno_ENOMEM;                                   \
+    *memptr = p;                                                   \
+    return 0;                                                      \
+  }                                                                \
+  SCOPED_INTERCEPTOR_RAW(posix_memalign, memptr, alignment, size); \
+  int res = user_posix_memalign(thr, pc, memptr, alignment, size);
+#define COMMON_MALLOC_VALLOC(size)                            \
+  if (in_symbolizer())                                        \
+    return InternalAlloc(size, nullptr, GetPageSizeCached()); \
+  SCOPED_INTERCEPTOR_RAW(valloc, size);                       \
+  void *p = user_valloc(thr, pc, size)
+#define COMMON_MALLOC_FREE(ptr)                              \
+  if (in_symbolizer()) return InternalFree(ptr);             \
+  SCOPED_INTERCEPTOR_RAW(free, ptr);                         \
+  user_free(thr, pc, ptr)
+#define COMMON_MALLOC_SIZE(ptr) uptr size = user_alloc_usable_size(ptr);
+#define COMMON_MALLOC_FILL_STATS(zone, stats)
+#define COMMON_MALLOC_REPORT_UNKNOWN_REALLOC(ptr, zone_ptr, zone_name) \
+  (void)zone_name; \
+  Report("mz_realloc(%p) -- attempting to realloc unallocated memory.\n", ptr);
+#define COMMON_MALLOC_NAMESPACE __tsan
+#define COMMON_MALLOC_HAS_ZONE_ENUMERATOR 0
+#define COMMON_MALLOC_HAS_EXTRA_INTROSPECTION_INIT 0
+
+#include "sanitizer_common/sanitizer_malloc_mac.inc"
+
+#endif

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_md5.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_md5.cpp
new file mode 100644
index 0000000000000..72857b773fed6
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_md5.cpp
@@ -0,0 +1,250 @@
+//===-- tsan_md5.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+#define F(x, y, z)      ((z) ^ ((x) & ((y) ^ (z))))
+#define G(x, y, z)      ((y) ^ ((z) & ((x) ^ (y))))
+#define H(x, y, z)      ((x) ^ (y) ^ (z))
+#define I(x, y, z)      ((y) ^ ((x) | ~(z)))
+
+#define STEP(f, a, b, c, d, x, t, s) \
+  (a) += f((b), (c), (d)) + (x) + (t); \
+  (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
+  (a) += (b);
+
+#define SET(n) \
+  (*(const MD5_u32plus *)&ptr[(n) * 4])
+#define GET(n) \
+  SET(n)
+
+typedef unsigned int MD5_u32plus;
+typedef unsigned long ulong_t;
+
+typedef struct {
+  MD5_u32plus lo, hi;
+  MD5_u32plus a, b, c, d;
+  unsigned char buffer[64];
+  MD5_u32plus block[16];
+} MD5_CTX;
+
+static const void *body(MD5_CTX *ctx, const void *data, ulong_t size) {
+  const unsigned char *ptr = (const unsigned char *)data;
+  MD5_u32plus a, b, c, d;
+  MD5_u32plus saved_a, saved_b, saved_c, saved_d;
+
+  a = ctx->a;
+  b = ctx->b;
+  c = ctx->c;
+  d = ctx->d;
+
+  do {
+    saved_a = a;
+    saved_b = b;
+    saved_c = c;
+    saved_d = d;
+
+    STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
+    STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
+    STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
+    STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
+    STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
+    STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
+    STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
+    STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
+    STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
+    STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
+    STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
+    STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
+    STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
+    STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
+    STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
+    STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
+
+    STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
+    STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
+    STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
+    STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
+    STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
+    STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
+    STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
+    STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
+    STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
+    STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
+    STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
+    STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
+    STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
+    STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
+    STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
+    STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
+
+    STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
+    STEP(H, d, a, b, c, GET(8), 0x8771f681, 11)
+    STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
+    STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23)
+    STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
+    STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11)
+    STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
+    STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23)
+    STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
+    STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11)
+    STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
+    STEP(H, b, c, d, a, GET(6), 0x04881d05, 23)
+    STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
+    STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11)
+    STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
+    STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23)
+
+    STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
+    STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
+    STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
+    STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
+    STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
+    STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
+    STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
+    STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
+    STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
+    STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
+    STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
+    STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
+    STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
+    STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
+    STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
+    STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
+
+    a += saved_a;
+    b += saved_b;
+    c += saved_c;
+    d += saved_d;
+
+    ptr += 64;
+  } while (size -= 64);
+
+  ctx->a = a;
+  ctx->b = b;
+  ctx->c = c;
+  ctx->d = d;
+
+  return ptr;
+}
+
+#undef F
+#undef G
+#undef H
+#undef I
+#undef STEP
+#undef SET
+#undef GET
+
+void MD5_Init(MD5_CTX *ctx) {
+  ctx->a = 0x67452301;
+  ctx->b = 0xefcdab89;
+  ctx->c = 0x98badcfe;
+  ctx->d = 0x10325476;
+
+  ctx->lo = 0;
+  ctx->hi = 0;
+}
+
+void MD5_Update(MD5_CTX *ctx, const void *data, ulong_t size) {
+  MD5_u32plus saved_lo;
+  ulong_t used, free;
+
+  saved_lo = ctx->lo;
+  if ((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo)
+    ctx->hi++;
+  ctx->hi += size >> 29;
+
+  used = saved_lo & 0x3f;
+
+  if (used) {
+    free = 64 - used;
+
+    if (size < free) {
+      internal_memcpy(&ctx->buffer[used], data, size);
+      return;
+    }
+
+    internal_memcpy(&ctx->buffer[used], data, free);
+    data = (const unsigned char *)data + free;
+    size -= free;
+    body(ctx, ctx->buffer, 64);
+  }
+
+  if (size >= 64) {
+    data = body(ctx, data, size & ~(ulong_t)0x3f);
+    size &= 0x3f;
+  }
+
+  internal_memcpy(ctx->buffer, data, size);
+}
+
+void MD5_Final(unsigned char *result, MD5_CTX *ctx) {
+  ulong_t used, free;
+
+  used = ctx->lo & 0x3f;
+
+  ctx->buffer[used++] = 0x80;
+
+  free = 64 - used;
+
+  if (free < 8) {
+    internal_memset(&ctx->buffer[used], 0, free);
+    body(ctx, ctx->buffer, 64);
+    used = 0;
+    free = 64;
+  }
+
+  internal_memset(&ctx->buffer[used], 0, free - 8);
+
+  ctx->lo <<= 3;
+  ctx->buffer[56] = ctx->lo;
+  ctx->buffer[57] = ctx->lo >> 8;
+  ctx->buffer[58] = ctx->lo >> 16;
+  ctx->buffer[59] = ctx->lo >> 24;
+  ctx->buffer[60] = ctx->hi;
+  ctx->buffer[61] = ctx->hi >> 8;
+  ctx->buffer[62] = ctx->hi >> 16;
+  ctx->buffer[63] = ctx->hi >> 24;
+
+  body(ctx, ctx->buffer, 64);
+
+  result[0] = ctx->a;
+  result[1] = ctx->a >> 8;
+  result[2] = ctx->a >> 16;
+  result[3] = ctx->a >> 24;
+  result[4] = ctx->b;
+  result[5] = ctx->b >> 8;
+  result[6] = ctx->b >> 16;
+  result[7] = ctx->b >> 24;
+  result[8] = ctx->c;
+  result[9] = ctx->c >> 8;
+  result[10] = ctx->c >> 16;
+  result[11] = ctx->c >> 24;
+  result[12] = ctx->d;
+  result[13] = ctx->d >> 8;
+  result[14] = ctx->d >> 16;
+  result[15] = ctx->d >> 24;
+
+  internal_memset(ctx, 0, sizeof(*ctx));
+}
+
+MD5Hash md5_hash(const void *data, uptr size) {
+  MD5Hash res;
+  MD5_CTX ctx;
+  MD5_Init(&ctx);
+  MD5_Update(&ctx, data, size);
+  MD5_Final((unsigned char*)&res.hash[0], &ctx);
+  return res;
+}
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_mman.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_mman.cpp
new file mode 100644
index 0000000000000..75044c38d5d23
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_mman.cpp
@@ -0,0 +1,436 @@
+//===-- tsan_mman.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_allocator_checks.h"
+#include "sanitizer_common/sanitizer_allocator_interface.h"
+#include "sanitizer_common/sanitizer_allocator_report.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_errno.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "tsan_mman.h"
+#include "tsan_rtl.h"
+#include "tsan_report.h"
+#include "tsan_flags.h"
+
+// May be overriden by front-end.
+SANITIZER_WEAK_DEFAULT_IMPL
+void __sanitizer_malloc_hook(void *ptr, uptr size) {
+  (void)ptr;
+  (void)size;
+}
+
+SANITIZER_WEAK_DEFAULT_IMPL
+void __sanitizer_free_hook(void *ptr) {
+  (void)ptr;
+}
+
+namespace __tsan {
+
+struct MapUnmapCallback {
+  void OnMap(uptr p, uptr size) const { }
+  void OnUnmap(uptr p, uptr size) const {
+    // We are about to unmap a chunk of user memory.
+    // Mark the corresponding shadow memory as not needed.
+    DontNeedShadowFor(p, size);
+    // Mark the corresponding meta shadow memory as not needed.
+    // Note the block does not contain any meta info at this point
+    // (this happens after free).
+    const uptr kMetaRatio = kMetaShadowCell / kMetaShadowSize;
+    const uptr kPageSize = GetPageSizeCached() * kMetaRatio;
+    // Block came from LargeMmapAllocator, so must be large.
+    // We rely on this in the calculations below.
+    CHECK_GE(size, 2 * kPageSize);
+    uptr 
diff  = RoundUp(p, kPageSize) - p;
+    if (
diff  != 0) {
+      p += 
diff ;
+      size -= 
diff ;
+    }
+    
diff  = p + size - RoundDown(p + size, kPageSize);
+    if (
diff  != 0)
+      size -= 
diff ;
+    uptr p_meta = (uptr)MemToMeta(p);
+    ReleaseMemoryPagesToOS(p_meta, p_meta + size / kMetaRatio);
+  }
+};
+
+static char allocator_placeholder[sizeof(Allocator)] ALIGNED(64);
+Allocator *allocator() {
+  return reinterpret_cast<Allocator*>(&allocator_placeholder);
+}
+
+struct GlobalProc {
+  Mutex mtx;
+  Processor *proc;
+  // This mutex represents the internal allocator combined for
+  // the purposes of deadlock detection. The internal allocator
+  // uses multiple mutexes, moreover they are locked only occasionally
+  // and they are spin mutexes which don't support deadlock detection.
+  // So we use this fake mutex to serve as a substitute for these mutexes.
+  CheckedMutex internal_alloc_mtx;
+
+  GlobalProc()
+      : mtx(MutexTypeGlobalProc),
+        proc(ProcCreate()),
+        internal_alloc_mtx(MutexTypeInternalAlloc) {}
+};
+
+static char global_proc_placeholder[sizeof(GlobalProc)] ALIGNED(64);
+GlobalProc *global_proc() {
+  return reinterpret_cast<GlobalProc*>(&global_proc_placeholder);
+}
+
+static void InternalAllocAccess() {
+  global_proc()->internal_alloc_mtx.Lock();
+  global_proc()->internal_alloc_mtx.Unlock();
+}
+
+ScopedGlobalProcessor::ScopedGlobalProcessor() {
+  GlobalProc *gp = global_proc();
+  ThreadState *thr = cur_thread();
+  if (thr->proc())
+    return;
+  // If we don't have a proc, use the global one.
+  // There are currently only two known case where this path is triggered:
+  //   __interceptor_free
+  //   __nptl_deallocate_tsd
+  //   start_thread
+  //   clone
+  // and:
+  //   ResetRange
+  //   __interceptor_munmap
+  //   __deallocate_stack
+  //   start_thread
+  //   clone
+  // Ideally, we destroy thread state (and unwire proc) when a thread actually
+  // exits (i.e. when we join/wait it). Then we would not need the global proc
+  gp->mtx.Lock();
+  ProcWire(gp->proc, thr);
+}
+
+ScopedGlobalProcessor::~ScopedGlobalProcessor() {
+  GlobalProc *gp = global_proc();
+  ThreadState *thr = cur_thread();
+  if (thr->proc() != gp->proc)
+    return;
+  ProcUnwire(gp->proc, thr);
+  gp->mtx.Unlock();
+}
+
+void AllocatorLock() NO_THREAD_SAFETY_ANALYSIS {
+  global_proc()->mtx.Lock();
+  global_proc()->internal_alloc_mtx.Lock();
+  InternalAllocatorLock();
+}
+
+void AllocatorUnlock() NO_THREAD_SAFETY_ANALYSIS {
+  InternalAllocatorUnlock();
+  global_proc()->internal_alloc_mtx.Unlock();
+  global_proc()->mtx.Unlock();
+}
+
+static constexpr uptr kMaxAllowedMallocSize = 1ull << 40;
+static uptr max_user_defined_malloc_size;
+
+void InitializeAllocator() {
+  SetAllocatorMayReturnNull(common_flags()->allocator_may_return_null);
+  allocator()->Init(common_flags()->allocator_release_to_os_interval_ms);
+  max_user_defined_malloc_size = common_flags()->max_allocation_size_mb
+                                     ? common_flags()->max_allocation_size_mb
+                                           << 20
+                                     : kMaxAllowedMallocSize;
+}
+
+void InitializeAllocatorLate() {
+  new(global_proc()) GlobalProc();
+}
+
+void AllocatorProcStart(Processor *proc) {
+  allocator()->InitCache(&proc->alloc_cache);
+  internal_allocator()->InitCache(&proc->internal_alloc_cache);
+}
+
+void AllocatorProcFinish(Processor *proc) {
+  allocator()->DestroyCache(&proc->alloc_cache);
+  internal_allocator()->DestroyCache(&proc->internal_alloc_cache);
+}
+
+void AllocatorPrintStats() {
+  allocator()->PrintStats();
+}
+
+static void SignalUnsafeCall(ThreadState *thr, uptr pc) {
+  if (atomic_load_relaxed(&thr->in_signal_handler) == 0 ||
+      !ShouldReport(thr, ReportTypeSignalUnsafe))
+    return;
+  VarSizeStackTrace stack;
+  ObtainCurrentStack(thr, pc, &stack);
+  if (IsFiredSuppression(ctx, ReportTypeSignalUnsafe, stack))
+    return;
+  ThreadRegistryLock l(&ctx->thread_registry);
+  ScopedReport rep(ReportTypeSignalUnsafe);
+  rep.AddStack(stack, true);
+  OutputReport(thr, rep);
+}
+
+
+void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz, uptr align,
+                          bool signal) {
+  if (sz >= kMaxAllowedMallocSize || align >= kMaxAllowedMallocSize ||
+      sz > max_user_defined_malloc_size) {
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    uptr malloc_limit =
+        Min(kMaxAllowedMallocSize, max_user_defined_malloc_size);
+    GET_STACK_TRACE_FATAL(thr, pc);
+    ReportAllocationSizeTooBig(sz, malloc_limit, &stack);
+  }
+  if (UNLIKELY(IsRssLimitExceeded())) {
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    GET_STACK_TRACE_FATAL(thr, pc);
+    ReportRssLimitExceeded(&stack);
+  }
+  void *p = allocator()->Allocate(&thr->proc()->alloc_cache, sz, align);
+  if (UNLIKELY(!p)) {
+    SetAllocatorOutOfMemory();
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    GET_STACK_TRACE_FATAL(thr, pc);
+    ReportOutOfMemory(sz, &stack);
+  }
+  if (ctx && ctx->initialized)
+    OnUserAlloc(thr, pc, (uptr)p, sz, true);
+  if (signal)
+    SignalUnsafeCall(thr, pc);
+  return p;
+}
+
+void user_free(ThreadState *thr, uptr pc, void *p, bool signal) {
+  ScopedGlobalProcessor sgp;
+  if (ctx && ctx->initialized)
+    OnUserFree(thr, pc, (uptr)p, true);
+  allocator()->Deallocate(&thr->proc()->alloc_cache, p);
+  if (signal)
+    SignalUnsafeCall(thr, pc);
+}
+
+void *user_alloc(ThreadState *thr, uptr pc, uptr sz) {
+  return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, kDefaultAlignment));
+}
+
+void *user_calloc(ThreadState *thr, uptr pc, uptr size, uptr n) {
+  if (UNLIKELY(CheckForCallocOverflow(size, n))) {
+    if (AllocatorMayReturnNull())
+      return SetErrnoOnNull(nullptr);
+    GET_STACK_TRACE_FATAL(thr, pc);
+    ReportCallocOverflow(n, size, &stack);
+  }
+  void *p = user_alloc_internal(thr, pc, n * size);
+  if (p)
+    internal_memset(p, 0, n * size);
+  return SetErrnoOnNull(p);
+}
+
+void *user_reallocarray(ThreadState *thr, uptr pc, void *p, uptr size, uptr n) {
+  if (UNLIKELY(CheckForCallocOverflow(size, n))) {
+    if (AllocatorMayReturnNull())
+      return SetErrnoOnNull(nullptr);
+    GET_STACK_TRACE_FATAL(thr, pc);
+    ReportReallocArrayOverflow(size, n, &stack);
+  }
+  return user_realloc(thr, pc, p, size * n);
+}
+
+void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write) {
+  DPrintf("#%d: alloc(%zu) = 0x%zx\n", thr->tid, sz, p);
+  ctx->metamap.AllocBlock(thr, pc, p, sz);
+  if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited)
+    MemoryRangeImitateWrite(thr, pc, (uptr)p, sz);
+  else
+    MemoryResetRange(thr, pc, (uptr)p, sz);
+}
+
+void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write) {
+  CHECK_NE(p, (void*)0);
+  uptr sz = ctx->metamap.FreeBlock(thr->proc(), p);
+  DPrintf("#%d: free(0x%zx, %zu)\n", thr->tid, p, sz);
+  if (write && thr->ignore_reads_and_writes == 0 && thr->is_inited)
+    MemoryRangeFreed(thr, pc, (uptr)p, sz);
+}
+
+void *user_realloc(ThreadState *thr, uptr pc, void *p, uptr sz) {
+  // FIXME: Handle "shrinking" more efficiently,
+  // it seems that some software actually does this.
+  if (!p)
+    return SetErrnoOnNull(user_alloc_internal(thr, pc, sz));
+  if (!sz) {
+    user_free(thr, pc, p);
+    return nullptr;
+  }
+  void *new_p = user_alloc_internal(thr, pc, sz);
+  if (new_p) {
+    uptr old_sz = user_alloc_usable_size(p);
+    internal_memcpy(new_p, p, min(old_sz, sz));
+    user_free(thr, pc, p);
+  }
+  return SetErrnoOnNull(new_p);
+}
+
+void *user_memalign(ThreadState *thr, uptr pc, uptr align, uptr sz) {
+  if (UNLIKELY(!IsPowerOfTwo(align))) {
+    errno = errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    GET_STACK_TRACE_FATAL(thr, pc);
+    ReportInvalidAllocationAlignment(align, &stack);
+  }
+  return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, align));
+}
+
+int user_posix_memalign(ThreadState *thr, uptr pc, void **memptr, uptr align,
+                        uptr sz) {
+  if (UNLIKELY(!CheckPosixMemalignAlignment(align))) {
+    if (AllocatorMayReturnNull())
+      return errno_EINVAL;
+    GET_STACK_TRACE_FATAL(thr, pc);
+    ReportInvalidPosixMemalignAlignment(align, &stack);
+  }
+  void *ptr = user_alloc_internal(thr, pc, sz, align);
+  if (UNLIKELY(!ptr))
+    // OOM error is already taken care of by user_alloc_internal.
+    return errno_ENOMEM;
+  CHECK(IsAligned((uptr)ptr, align));
+  *memptr = ptr;
+  return 0;
+}
+
+void *user_aligned_alloc(ThreadState *thr, uptr pc, uptr align, uptr sz) {
+  if (UNLIKELY(!CheckAlignedAllocAlignmentAndSize(align, sz))) {
+    errno = errno_EINVAL;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    GET_STACK_TRACE_FATAL(thr, pc);
+    ReportInvalidAlignedAllocAlignment(sz, align, &stack);
+  }
+  return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, align));
+}
+
+void *user_valloc(ThreadState *thr, uptr pc, uptr sz) {
+  return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, GetPageSizeCached()));
+}
+
+void *user_pvalloc(ThreadState *thr, uptr pc, uptr sz) {
+  uptr PageSize = GetPageSizeCached();
+  if (UNLIKELY(CheckForPvallocOverflow(sz, PageSize))) {
+    errno = errno_ENOMEM;
+    if (AllocatorMayReturnNull())
+      return nullptr;
+    GET_STACK_TRACE_FATAL(thr, pc);
+    ReportPvallocOverflow(sz, &stack);
+  }
+  // pvalloc(0) should allocate one page.
+  sz = sz ? RoundUpTo(sz, PageSize) : PageSize;
+  return SetErrnoOnNull(user_alloc_internal(thr, pc, sz, PageSize));
+}
+
+uptr user_alloc_usable_size(const void *p) {
+  if (p == 0)
+    return 0;
+  MBlock *b = ctx->metamap.GetBlock((uptr)p);
+  if (!b)
+    return 0;  // Not a valid pointer.
+  if (b->siz == 0)
+    return 1;  // Zero-sized allocations are actually 1 byte.
+  return b->siz;
+}
+
+void invoke_malloc_hook(void *ptr, uptr size) {
+  ThreadState *thr = cur_thread();
+  if (ctx == 0 || !ctx->initialized || thr->ignore_interceptors)
+    return;
+  __sanitizer_malloc_hook(ptr, size);
+  RunMallocHooks(ptr, size);
+}
+
+void invoke_free_hook(void *ptr) {
+  ThreadState *thr = cur_thread();
+  if (ctx == 0 || !ctx->initialized || thr->ignore_interceptors)
+    return;
+  __sanitizer_free_hook(ptr);
+  RunFreeHooks(ptr);
+}
+
+void *Alloc(uptr sz) {
+  ThreadState *thr = cur_thread();
+  if (thr->nomalloc) {
+    thr->nomalloc = 0;  // CHECK calls internal_malloc().
+    CHECK(0);
+  }
+  InternalAllocAccess();
+  return InternalAlloc(sz, &thr->proc()->internal_alloc_cache);
+}
+
+void FreeImpl(void *p) {
+  ThreadState *thr = cur_thread();
+  if (thr->nomalloc) {
+    thr->nomalloc = 0;  // CHECK calls internal_malloc().
+    CHECK(0);
+  }
+  InternalAllocAccess();
+  InternalFree(p, &thr->proc()->internal_alloc_cache);
+}
+
+}  // namespace __tsan
+
+using namespace __tsan;
+
+extern "C" {
+uptr __sanitizer_get_current_allocated_bytes() {
+  uptr stats[AllocatorStatCount];
+  allocator()->GetStats(stats);
+  return stats[AllocatorStatAllocated];
+}
+
+uptr __sanitizer_get_heap_size() {
+  uptr stats[AllocatorStatCount];
+  allocator()->GetStats(stats);
+  return stats[AllocatorStatMapped];
+}
+
+uptr __sanitizer_get_free_bytes() {
+  return 1;
+}
+
+uptr __sanitizer_get_unmapped_bytes() {
+  return 1;
+}
+
+uptr __sanitizer_get_estimated_allocated_size(uptr size) {
+  return size;
+}
+
+int __sanitizer_get_ownership(const void *p) {
+  return allocator()->GetBlockBegin(p) != 0;
+}
+
+uptr __sanitizer_get_allocated_size(const void *p) {
+  return user_alloc_usable_size(p);
+}
+
+void __tsan_on_thread_idle() {
+  ThreadState *thr = cur_thread();
+  thr->clock.ResetCached(&thr->proc()->clock_cache);
+  thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache);
+  allocator()->SwallowCache(&thr->proc()->alloc_cache);
+  internal_allocator()->SwallowCache(&thr->proc()->internal_alloc_cache);
+  ctx->metamap.OnProcIdle(thr->proc());
+}
+}  // extern "C"

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_mman.h b/compiler-rt/lib/tsan/rtl-old/tsan_mman.h
new file mode 100644
index 0000000000000..db8488eabbe28
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_mman.h
@@ -0,0 +1,78 @@
+//===-- tsan_mman.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_MMAN_H
+#define TSAN_MMAN_H
+
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+const uptr kDefaultAlignment = 16;
+
+void InitializeAllocator();
+void InitializeAllocatorLate();
+void ReplaceSystemMalloc();
+void AllocatorProcStart(Processor *proc);
+void AllocatorProcFinish(Processor *proc);
+void AllocatorPrintStats();
+void AllocatorLock();
+void AllocatorUnlock();
+
+// For user allocations.
+void *user_alloc_internal(ThreadState *thr, uptr pc, uptr sz,
+                          uptr align = kDefaultAlignment, bool signal = true);
+// Does not accept NULL.
+void user_free(ThreadState *thr, uptr pc, void *p, bool signal = true);
+// Interceptor implementations.
+void *user_alloc(ThreadState *thr, uptr pc, uptr sz);
+void *user_calloc(ThreadState *thr, uptr pc, uptr sz, uptr n);
+void *user_realloc(ThreadState *thr, uptr pc, void *p, uptr sz);
+void *user_reallocarray(ThreadState *thr, uptr pc, void *p, uptr sz, uptr n);
+void *user_memalign(ThreadState *thr, uptr pc, uptr align, uptr sz);
+int user_posix_memalign(ThreadState *thr, uptr pc, void **memptr, uptr align,
+                        uptr sz);
+void *user_aligned_alloc(ThreadState *thr, uptr pc, uptr align, uptr sz);
+void *user_valloc(ThreadState *thr, uptr pc, uptr sz);
+void *user_pvalloc(ThreadState *thr, uptr pc, uptr sz);
+uptr user_alloc_usable_size(const void *p);
+
+// Invoking malloc/free hooks that may be installed by the user.
+void invoke_malloc_hook(void *ptr, uptr size);
+void invoke_free_hook(void *ptr);
+
+// For internal data structures.
+void *Alloc(uptr sz);
+void FreeImpl(void *p);
+
+template <typename T, typename... Args>
+T *New(Args &&...args) {
+  return new (Alloc(sizeof(T))) T(static_cast<Args &&>(args)...);
+}
+
+template <typename T>
+void Free(T *&p) {
+  if (p == nullptr)
+    return;
+  FreeImpl(p);
+  p = nullptr;
+}
+
+template <typename T>
+void DestroyAndFree(T *&p) {
+  if (p == nullptr)
+    return;
+  p->~T();
+  Free(p);
+}
+
+}  // namespace __tsan
+#endif  // TSAN_MMAN_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_mutexset.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_mutexset.cpp
new file mode 100644
index 0000000000000..735179686ba95
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_mutexset.cpp
@@ -0,0 +1,132 @@
+//===-- tsan_mutexset.cpp -------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_mutexset.h"
+
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+MutexSet::MutexSet() {
+}
+
+void MutexSet::Add(u64 id, bool write, u64 epoch) {
+  // Look up existing mutex with the same id.
+  for (uptr i = 0; i < size_; i++) {
+    if (descs_[i].id == id) {
+      descs_[i].count++;
+      descs_[i].epoch = epoch;
+      return;
+    }
+  }
+  // On overflow, find the oldest mutex and drop it.
+  if (size_ == kMaxSize) {
+    u64 minepoch = (u64)-1;
+    u64 mini = (u64)-1;
+    for (uptr i = 0; i < size_; i++) {
+      if (descs_[i].epoch < minepoch) {
+        minepoch = descs_[i].epoch;
+        mini = i;
+      }
+    }
+    RemovePos(mini);
+    CHECK_EQ(size_, kMaxSize - 1);
+  }
+  // Add new mutex descriptor.
+  descs_[size_].addr = 0;
+  descs_[size_].stack_id = kInvalidStackID;
+  descs_[size_].id = id;
+  descs_[size_].write = write;
+  descs_[size_].epoch = epoch;
+  descs_[size_].seq = seq_++;
+  descs_[size_].count = 1;
+  size_++;
+}
+
+void MutexSet::Del(u64 id, bool write) {
+  for (uptr i = 0; i < size_; i++) {
+    if (descs_[i].id == id) {
+      if (--descs_[i].count == 0)
+        RemovePos(i);
+      return;
+    }
+  }
+}
+
+void MutexSet::Remove(u64 id) {
+  for (uptr i = 0; i < size_; i++) {
+    if (descs_[i].id == id) {
+      RemovePos(i);
+      return;
+    }
+  }
+}
+
+void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {
+  // Look up existing mutex with the same id.
+  for (uptr i = 0; i < size_; i++) {
+    if (descs_[i].addr == addr) {
+      descs_[i].count++;
+      descs_[i].seq = seq_++;
+      return;
+    }
+  }
+  // On overflow, find the oldest mutex and drop it.
+  if (size_ == kMaxSize) {
+    uptr min = 0;
+    for (uptr i = 0; i < size_; i++) {
+      if (descs_[i].seq < descs_[min].seq)
+        min = i;
+    }
+    RemovePos(min);
+    CHECK_EQ(size_, kMaxSize - 1);
+  }
+  // Add new mutex descriptor.
+  descs_[size_].addr = addr;
+  descs_[size_].stack_id = stack_id;
+  descs_[size_].id = 0;
+  descs_[size_].write = write;
+  descs_[size_].epoch = 0;
+  descs_[size_].seq = seq_++;
+  descs_[size_].count = 1;
+  size_++;
+}
+
+void MutexSet::DelAddr(uptr addr, bool destroy) {
+  for (uptr i = 0; i < size_; i++) {
+    if (descs_[i].addr == addr) {
+      if (destroy || --descs_[i].count == 0)
+        RemovePos(i);
+      return;
+    }
+  }
+}
+
+void MutexSet::RemovePos(uptr i) {
+  CHECK_LT(i, size_);
+  descs_[i] = descs_[size_ - 1];
+  size_--;
+}
+
+uptr MutexSet::Size() const {
+  return size_;
+}
+
+MutexSet::Desc MutexSet::Get(uptr i) const {
+  CHECK_LT(i, size_);
+  return descs_[i];
+}
+
+DynamicMutexSet::DynamicMutexSet() : ptr_(New<MutexSet>()) {}
+DynamicMutexSet::~DynamicMutexSet() { DestroyAndFree(ptr_); }
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_mutexset.h b/compiler-rt/lib/tsan/rtl-old/tsan_mutexset.h
new file mode 100644
index 0000000000000..93776a6641351
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_mutexset.h
@@ -0,0 +1,98 @@
+//===-- tsan_mutexset.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// MutexSet holds the set of mutexes currently held by a thread.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_MUTEXSET_H
+#define TSAN_MUTEXSET_H
+
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+class MutexSet {
+ public:
+  // Holds limited number of mutexes.
+  // The oldest mutexes are discarded on overflow.
+  static constexpr uptr kMaxSize = 16;
+  struct Desc {
+    uptr addr;
+    StackID stack_id;
+    u64 id;
+    u64 epoch;
+    u32 seq;
+    u32 count;
+    bool write;
+
+    Desc() { internal_memset(this, 0, sizeof(*this)); }
+    Desc(const Desc& other) { *this = other; }
+    Desc& operator=(const MutexSet::Desc& other) {
+      internal_memcpy(this, &other, sizeof(*this));
+      return *this;
+    }
+  };
+
+  MutexSet();
+  // The 'id' is obtained from SyncVar::GetId().
+  void Add(u64 id, bool write, u64 epoch);
+  void Del(u64 id, bool write);
+  void Remove(u64 id);  // Removes the mutex completely (if it's destroyed).
+  void AddAddr(uptr addr, StackID stack_id, bool write);
+  void DelAddr(uptr addr, bool destroy = false);
+  uptr Size() const;
+  Desc Get(uptr i) const;
+
+ private:
+#if !SANITIZER_GO
+  u32 seq_ = 0;
+  uptr size_ = 0;
+  Desc descs_[kMaxSize];
+
+  void RemovePos(uptr i);
+#endif
+};
+
+// MutexSet is too large to live on stack.
+// DynamicMutexSet can be use used to create local MutexSet's.
+class DynamicMutexSet {
+ public:
+  DynamicMutexSet();
+  ~DynamicMutexSet();
+  MutexSet* operator->() { return ptr_; }
+  operator MutexSet*() { return ptr_; }
+  DynamicMutexSet(const DynamicMutexSet&) = delete;
+  DynamicMutexSet& operator=(const DynamicMutexSet&) = delete;
+
+ private:
+  MutexSet* ptr_;
+#if SANITIZER_GO
+  MutexSet set_;
+#endif
+};
+
+// Go does not have mutexes, so do not spend memory and time.
+// (Go sync.Mutex is actually a semaphore -- can be unlocked
+// in 
diff erent goroutine).
+#if SANITIZER_GO
+MutexSet::MutexSet() {}
+void MutexSet::Add(u64 id, bool write, u64 epoch) {}
+void MutexSet::Del(u64 id, bool write) {}
+void MutexSet::Remove(u64 id) {}
+void MutexSet::AddAddr(uptr addr, StackID stack_id, bool write) {}
+void MutexSet::DelAddr(uptr addr, bool destroy) {}
+uptr MutexSet::Size() const { return 0; }
+MutexSet::Desc MutexSet::Get(uptr i) const { return Desc(); }
+DynamicMutexSet::DynamicMutexSet() : ptr_(&set_) {}
+DynamicMutexSet::~DynamicMutexSet() {}
+#endif
+
+}  // namespace __tsan
+
+#endif  // TSAN_MUTEXSET_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_new_delete.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_new_delete.cpp
new file mode 100644
index 0000000000000..fc44a5221b5b0
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_new_delete.cpp
@@ -0,0 +1,199 @@
+//===-- tsan_new_delete.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Interceptors for operators new and delete.
+//===----------------------------------------------------------------------===//
+#include "interception/interception.h"
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_report.h"
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "tsan_interceptors.h"
+#include "tsan_rtl.h"
+
+using namespace __tsan;
+
+namespace std {
+struct nothrow_t {};
+enum class align_val_t: __sanitizer::uptr {};
+}  // namespace std
+
+DECLARE_REAL(void *, malloc, uptr size)
+DECLARE_REAL(void, free, void *ptr)
+
+// TODO(alekseys): throw std::bad_alloc instead of dying on OOM.
+#define OPERATOR_NEW_BODY(mangled_name, nothrow) \
+  if (in_symbolizer()) \
+    return InternalAlloc(size); \
+  void *p = 0; \
+  {  \
+    SCOPED_INTERCEPTOR_RAW(mangled_name, size); \
+    p = user_alloc(thr, pc, size); \
+    if (!nothrow && UNLIKELY(!p)) { \
+      GET_STACK_TRACE_FATAL(thr, pc); \
+      ReportOutOfMemory(size, &stack); \
+    } \
+  }  \
+  invoke_malloc_hook(p, size);  \
+  return p;
+
+#define OPERATOR_NEW_BODY_ALIGN(mangled_name, nothrow) \
+  if (in_symbolizer()) \
+    return InternalAlloc(size, nullptr, (uptr)align); \
+  void *p = 0; \
+  {  \
+    SCOPED_INTERCEPTOR_RAW(mangled_name, size); \
+    p = user_memalign(thr, pc, (uptr)align, size); \
+    if (!nothrow && UNLIKELY(!p)) { \
+      GET_STACK_TRACE_FATAL(thr, pc); \
+      ReportOutOfMemory(size, &stack); \
+    } \
+  }  \
+  invoke_malloc_hook(p, size);  \
+  return p;
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *operator new(__sanitizer::uptr size);
+void *operator new(__sanitizer::uptr size) {
+  OPERATOR_NEW_BODY(_Znwm, false /*nothrow*/);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *operator new[](__sanitizer::uptr size);
+void *operator new[](__sanitizer::uptr size) {
+  OPERATOR_NEW_BODY(_Znam, false /*nothrow*/);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *operator new(__sanitizer::uptr size, std::nothrow_t const&);
+void *operator new(__sanitizer::uptr size, std::nothrow_t const&) {
+  OPERATOR_NEW_BODY(_ZnwmRKSt9nothrow_t, true /*nothrow*/);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *operator new[](__sanitizer::uptr size, std::nothrow_t const&);
+void *operator new[](__sanitizer::uptr size, std::nothrow_t const&) {
+  OPERATOR_NEW_BODY(_ZnamRKSt9nothrow_t, true /*nothrow*/);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *operator new(__sanitizer::uptr size, std::align_val_t align);
+void *operator new(__sanitizer::uptr size, std::align_val_t align) {
+  OPERATOR_NEW_BODY_ALIGN(_ZnwmSt11align_val_t, false /*nothrow*/);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *operator new[](__sanitizer::uptr size, std::align_val_t align);
+void *operator new[](__sanitizer::uptr size, std::align_val_t align) {
+  OPERATOR_NEW_BODY_ALIGN(_ZnamSt11align_val_t, false /*nothrow*/);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *operator new(__sanitizer::uptr size, std::align_val_t align,
+                   std::nothrow_t const&);
+void *operator new(__sanitizer::uptr size, std::align_val_t align,
+                   std::nothrow_t const&) {
+  OPERATOR_NEW_BODY_ALIGN(_ZnwmSt11align_val_tRKSt9nothrow_t,
+                          true /*nothrow*/);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void *operator new[](__sanitizer::uptr size, std::align_val_t align,
+                     std::nothrow_t const&);
+void *operator new[](__sanitizer::uptr size, std::align_val_t align,
+                     std::nothrow_t const&) {
+  OPERATOR_NEW_BODY_ALIGN(_ZnamSt11align_val_tRKSt9nothrow_t,
+                          true /*nothrow*/);
+}
+
+#define OPERATOR_DELETE_BODY(mangled_name) \
+  if (ptr == 0) return;  \
+  if (in_symbolizer()) \
+    return InternalFree(ptr); \
+  invoke_free_hook(ptr);  \
+  SCOPED_INTERCEPTOR_RAW(mangled_name, ptr);  \
+  user_free(thr, pc, ptr);
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete(void *ptr) NOEXCEPT;
+void operator delete(void *ptr) NOEXCEPT {
+  OPERATOR_DELETE_BODY(_ZdlPv);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete[](void *ptr) NOEXCEPT;
+void operator delete[](void *ptr) NOEXCEPT {
+  OPERATOR_DELETE_BODY(_ZdaPv);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete(void *ptr, std::nothrow_t const&);
+void operator delete(void *ptr, std::nothrow_t const&) {
+  OPERATOR_DELETE_BODY(_ZdlPvRKSt9nothrow_t);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete[](void *ptr, std::nothrow_t const&);
+void operator delete[](void *ptr, std::nothrow_t const&) {
+  OPERATOR_DELETE_BODY(_ZdaPvRKSt9nothrow_t);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete(void *ptr, __sanitizer::uptr size) NOEXCEPT;
+void operator delete(void *ptr, __sanitizer::uptr size) NOEXCEPT {
+  OPERATOR_DELETE_BODY(_ZdlPvm);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete[](void *ptr, __sanitizer::uptr size) NOEXCEPT;
+void operator delete[](void *ptr, __sanitizer::uptr size) NOEXCEPT {
+  OPERATOR_DELETE_BODY(_ZdaPvm);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete(void *ptr, std::align_val_t align) NOEXCEPT;
+void operator delete(void *ptr, std::align_val_t align) NOEXCEPT {
+  OPERATOR_DELETE_BODY(_ZdlPvSt11align_val_t);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete[](void *ptr, std::align_val_t align) NOEXCEPT;
+void operator delete[](void *ptr, std::align_val_t align) NOEXCEPT {
+  OPERATOR_DELETE_BODY(_ZdaPvSt11align_val_t);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete(void *ptr, std::align_val_t align, std::nothrow_t const&);
+void operator delete(void *ptr, std::align_val_t align, std::nothrow_t const&) {
+  OPERATOR_DELETE_BODY(_ZdlPvSt11align_val_tRKSt9nothrow_t);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete[](void *ptr, std::align_val_t align,
+                       std::nothrow_t const&);
+void operator delete[](void *ptr, std::align_val_t align,
+                       std::nothrow_t const&) {
+  OPERATOR_DELETE_BODY(_ZdaPvSt11align_val_tRKSt9nothrow_t);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete(void *ptr, __sanitizer::uptr size,
+                     std::align_val_t align) NOEXCEPT;
+void operator delete(void *ptr, __sanitizer::uptr size,
+                     std::align_val_t align) NOEXCEPT {
+  OPERATOR_DELETE_BODY(_ZdlPvmSt11align_val_t);
+}
+
+SANITIZER_INTERFACE_ATTRIBUTE
+void operator delete[](void *ptr, __sanitizer::uptr size,
+                       std::align_val_t align) NOEXCEPT;
+void operator delete[](void *ptr, __sanitizer::uptr size,
+                       std::align_val_t align) NOEXCEPT {
+  OPERATOR_DELETE_BODY(_ZdaPvmSt11align_val_t);
+}

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_platform.h b/compiler-rt/lib/tsan/rtl-old/tsan_platform.h
new file mode 100644
index 0000000000000..7ff0acace8f6d
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_platform.h
@@ -0,0 +1,988 @@
+//===-- tsan_platform.h -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Platform-specific code.
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_PLATFORM_H
+#define TSAN_PLATFORM_H
+
+#if !defined(__LP64__) && !defined(_WIN64)
+# error "Only 64-bit is supported"
+#endif
+
+#include "tsan_defs.h"
+#include "tsan_trace.h"
+
+namespace __tsan {
+
+enum {
+  // App memory is not mapped onto shadow memory range.
+  kBrokenMapping = 1 << 0,
+  // Mapping app memory and back does not produce the same address,
+  // this can lead to wrong addresses in reports and potentially
+  // other bad consequences.
+  kBrokenReverseMapping = 1 << 1,
+  // Mapping is non-linear for linear user range.
+  // This is bad and can lead to unpredictable memory corruptions, etc
+  // because range access functions assume linearity.
+  kBrokenLinearity = 1 << 2,
+};
+
+/*
+C/C++ on linux/x86_64 and freebsd/x86_64
+0000 0000 1000 - 0080 0000 0000: main binary and/or MAP_32BIT mappings (512GB)
+0040 0000 0000 - 0100 0000 0000: -
+0100 0000 0000 - 2000 0000 0000: shadow
+2000 0000 0000 - 3000 0000 0000: -
+3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
+4000 0000 0000 - 5500 0000 0000: -
+5500 0000 0000 - 5680 0000 0000: pie binaries without ASLR or on 4.1+ kernels
+5680 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 7d00 0000 0000: -
+7b00 0000 0000 - 7c00 0000 0000: heap
+7c00 0000 0000 - 7e80 0000 0000: -
+7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
+
+C/C++ on netbsd/amd64 can reuse the same mapping:
+ * The address space starts from 0x1000 (option with 0x0) and ends with
+   0x7f7ffffff000.
+ * LoAppMem-kHeapMemEnd can be reused as it is.
+ * No VDSO support.
+ * No MidAppMem region.
+ * No additional HeapMem region.
+ * HiAppMem contains the stack, loader, shared libraries and heap.
+ * Stack on NetBSD/amd64 has prereserved 128MB.
+ * Heap grows downwards (top-down).
+ * ASLR must be disabled per-process or globally.
+*/
+struct Mapping48AddressSpace {
+  static const uptr kMetaShadowBeg = 0x300000000000ull;
+  static const uptr kMetaShadowEnd = 0x340000000000ull;
+  static const uptr kTraceMemBeg   = 0x600000000000ull;
+  static const uptr kTraceMemEnd   = 0x620000000000ull;
+  static const uptr kShadowBeg     = 0x010000000000ull;
+  static const uptr kShadowEnd     = 0x200000000000ull;
+  static const uptr kHeapMemBeg    = 0x7b0000000000ull;
+  static const uptr kHeapMemEnd    = 0x7c0000000000ull;
+  static const uptr kLoAppMemBeg   = 0x000000001000ull;
+  static const uptr kLoAppMemEnd   = 0x008000000000ull;
+  static const uptr kMidAppMemBeg  = 0x550000000000ull;
+  static const uptr kMidAppMemEnd  = 0x568000000000ull;
+  static const uptr kHiAppMemBeg   = 0x7e8000000000ull;
+  static const uptr kHiAppMemEnd   = 0x800000000000ull;
+  static const uptr kShadowMsk = 0x780000000000ull;
+  static const uptr kShadowXor = 0x040000000000ull;
+  static const uptr kShadowAdd = 0x000000000000ull;
+  static const uptr kVdsoBeg       = 0xf000000000000000ull;
+};
+
+/*
+C/C++ on linux/mips64 (40-bit VMA)
+0000 0000 00 - 0100 0000 00: -                                           (4 GB)
+0100 0000 00 - 0200 0000 00: main binary                                 (4 GB)
+0200 0000 00 - 2000 0000 00: -                                         (120 GB)
+2000 0000 00 - 4000 0000 00: shadow                                    (128 GB)
+4000 0000 00 - 5000 0000 00: metainfo (memory blocks and sync objects)  (64 GB)
+5000 0000 00 - aa00 0000 00: -                                         (360 GB)
+aa00 0000 00 - ab00 0000 00: main binary (PIE)                           (4 GB)
+ab00 0000 00 - b000 0000 00: -                                          (20 GB)
+b000 0000 00 - b200 0000 00: traces                                      (8 GB)
+b200 0000 00 - fe00 0000 00: -                                         (304 GB)
+fe00 0000 00 - ff00 0000 00: heap                                        (4 GB)
+ff00 0000 00 - ff80 0000 00: -                                           (2 GB)
+ff80 0000 00 - ffff ffff ff: modules and main thread stack              (<2 GB)
+*/
+struct MappingMips64_40 {
+  static const uptr kMetaShadowBeg = 0x4000000000ull;
+  static const uptr kMetaShadowEnd = 0x5000000000ull;
+  static const uptr kTraceMemBeg   = 0xb000000000ull;
+  static const uptr kTraceMemEnd   = 0xb200000000ull;
+  static const uptr kShadowBeg     = 0x2000000000ull;
+  static const uptr kShadowEnd     = 0x4000000000ull;
+  static const uptr kHeapMemBeg    = 0xfe00000000ull;
+  static const uptr kHeapMemEnd    = 0xff00000000ull;
+  static const uptr kLoAppMemBeg   = 0x0100000000ull;
+  static const uptr kLoAppMemEnd   = 0x0200000000ull;
+  static const uptr kMidAppMemBeg  = 0xaa00000000ull;
+  static const uptr kMidAppMemEnd  = 0xab00000000ull;
+  static const uptr kHiAppMemBeg   = 0xff80000000ull;
+  static const uptr kHiAppMemEnd   = 0xffffffffffull;
+  static const uptr kShadowMsk = 0xf800000000ull;
+  static const uptr kShadowXor = 0x0800000000ull;
+  static const uptr kShadowAdd = 0x0000000000ull;
+  static const uptr kVdsoBeg       = 0xfffff00000ull;
+};
+
+/*
+C/C++ on Darwin/iOS/ARM64 (36-bit VMA, 64 GB VM)
+0000 0000 00 - 0100 0000 00: -                                    (4 GB)
+0100 0000 00 - 0200 0000 00: main binary, modules, thread stacks  (4 GB)
+0200 0000 00 - 0300 0000 00: heap                                 (4 GB)
+0300 0000 00 - 0400 0000 00: -                                    (4 GB)
+0400 0000 00 - 0c00 0000 00: shadow memory                       (32 GB)
+0c00 0000 00 - 0d00 0000 00: -                                    (4 GB)
+0d00 0000 00 - 0e00 0000 00: metainfo                             (4 GB)
+0e00 0000 00 - 0f00 0000 00: -                                    (4 GB)
+0f00 0000 00 - 0fc0 0000 00: traces                               (3 GB)
+0fc0 0000 00 - 1000 0000 00: -
+*/
+struct MappingAppleAarch64 {
+  static const uptr kLoAppMemBeg   = 0x0100000000ull;
+  static const uptr kLoAppMemEnd   = 0x0200000000ull;
+  static const uptr kHeapMemBeg    = 0x0200000000ull;
+  static const uptr kHeapMemEnd    = 0x0300000000ull;
+  static const uptr kShadowBeg     = 0x0400000000ull;
+  static const uptr kShadowEnd     = 0x0c00000000ull;
+  static const uptr kMetaShadowBeg = 0x0d00000000ull;
+  static const uptr kMetaShadowEnd = 0x0e00000000ull;
+  static const uptr kTraceMemBeg   = 0x0f00000000ull;
+  static const uptr kTraceMemEnd   = 0x0fc0000000ull;
+  static const uptr kHiAppMemBeg   = 0x0fc0000000ull;
+  static const uptr kHiAppMemEnd   = 0x0fc0000000ull;
+  static const uptr kShadowMsk = 0x0ull;
+  static const uptr kShadowXor = 0x0ull;
+  static const uptr kShadowAdd = 0x0ull;
+  static const uptr kVdsoBeg       = 0x7000000000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+};
+
+/*
+C/C++ on linux/aarch64 (39-bit VMA)
+0000 0010 00 - 0100 0000 00: main binary
+0100 0000 00 - 0800 0000 00: -
+0800 0000 00 - 2000 0000 00: shadow memory
+2000 0000 00 - 3100 0000 00: -
+3100 0000 00 - 3400 0000 00: metainfo
+3400 0000 00 - 5500 0000 00: -
+5500 0000 00 - 5600 0000 00: main binary (PIE)
+5600 0000 00 - 6000 0000 00: -
+6000 0000 00 - 6200 0000 00: traces
+6200 0000 00 - 7d00 0000 00: -
+7c00 0000 00 - 7d00 0000 00: heap
+7d00 0000 00 - 7fff ffff ff: modules and main thread stack
+*/
+struct MappingAarch64_39 {
+  static const uptr kLoAppMemBeg   = 0x0000001000ull;
+  static const uptr kLoAppMemEnd   = 0x0100000000ull;
+  static const uptr kShadowBeg     = 0x0800000000ull;
+  static const uptr kShadowEnd     = 0x2000000000ull;
+  static const uptr kMetaShadowBeg = 0x3100000000ull;
+  static const uptr kMetaShadowEnd = 0x3400000000ull;
+  static const uptr kMidAppMemBeg  = 0x5500000000ull;
+  static const uptr kMidAppMemEnd  = 0x5600000000ull;
+  static const uptr kTraceMemBeg   = 0x6000000000ull;
+  static const uptr kTraceMemEnd   = 0x6200000000ull;
+  static const uptr kHeapMemBeg    = 0x7c00000000ull;
+  static const uptr kHeapMemEnd    = 0x7d00000000ull;
+  static const uptr kHiAppMemBeg   = 0x7e00000000ull;
+  static const uptr kHiAppMemEnd   = 0x7fffffffffull;
+  static const uptr kShadowMsk = 0x7800000000ull;
+  static const uptr kShadowXor = 0x0200000000ull;
+  static const uptr kShadowAdd = 0x0000000000ull;
+  static const uptr kVdsoBeg       = 0x7f00000000ull;
+};
+
+/*
+C/C++ on linux/aarch64 (42-bit VMA)
+00000 0010 00 - 01000 0000 00: main binary
+01000 0000 00 - 10000 0000 00: -
+10000 0000 00 - 20000 0000 00: shadow memory
+20000 0000 00 - 26000 0000 00: -
+26000 0000 00 - 28000 0000 00: metainfo
+28000 0000 00 - 2aa00 0000 00: -
+2aa00 0000 00 - 2ab00 0000 00: main binary (PIE)
+2ab00 0000 00 - 36200 0000 00: -
+36200 0000 00 - 36240 0000 00: traces
+36240 0000 00 - 3e000 0000 00: -
+3e000 0000 00 - 3f000 0000 00: heap
+3f000 0000 00 - 3ffff ffff ff: modules and main thread stack
+*/
+struct MappingAarch64_42 {
+  static const uptr kBroken = kBrokenReverseMapping;
+  static const uptr kLoAppMemBeg   = 0x00000001000ull;
+  static const uptr kLoAppMemEnd   = 0x01000000000ull;
+  static const uptr kShadowBeg     = 0x10000000000ull;
+  static const uptr kShadowEnd     = 0x20000000000ull;
+  static const uptr kMetaShadowBeg = 0x26000000000ull;
+  static const uptr kMetaShadowEnd = 0x28000000000ull;
+  static const uptr kMidAppMemBeg  = 0x2aa00000000ull;
+  static const uptr kMidAppMemEnd  = 0x2ab00000000ull;
+  static const uptr kTraceMemBeg   = 0x36200000000ull;
+  static const uptr kTraceMemEnd   = 0x36400000000ull;
+  static const uptr kHeapMemBeg    = 0x3e000000000ull;
+  static const uptr kHeapMemEnd    = 0x3f000000000ull;
+  static const uptr kHiAppMemBeg   = 0x3f000000000ull;
+  static const uptr kHiAppMemEnd   = 0x3ffffffffffull;
+  static const uptr kShadowMsk = 0x3c000000000ull;
+  static const uptr kShadowXor = 0x04000000000ull;
+  static const uptr kShadowAdd = 0x00000000000ull;
+  static const uptr kVdsoBeg       = 0x37f00000000ull;
+};
+
+struct MappingAarch64_48 {
+  static const uptr kLoAppMemBeg   = 0x0000000001000ull;
+  static const uptr kLoAppMemEnd   = 0x0000200000000ull;
+  static const uptr kShadowBeg     = 0x0002000000000ull;
+  static const uptr kShadowEnd     = 0x0004000000000ull;
+  static const uptr kMetaShadowBeg = 0x0005000000000ull;
+  static const uptr kMetaShadowEnd = 0x0006000000000ull;
+  static const uptr kMidAppMemBeg  = 0x0aaaa00000000ull;
+  static const uptr kMidAppMemEnd  = 0x0aaaf00000000ull;
+  static const uptr kTraceMemBeg   = 0x0f06000000000ull;
+  static const uptr kTraceMemEnd   = 0x0f06200000000ull;
+  static const uptr kHeapMemBeg    = 0x0ffff00000000ull;
+  static const uptr kHeapMemEnd    = 0x0ffff00000000ull;
+  static const uptr kHiAppMemBeg   = 0x0ffff00000000ull;
+  static const uptr kHiAppMemEnd   = 0x1000000000000ull;
+  static const uptr kShadowMsk = 0x0fff800000000ull;
+  static const uptr kShadowXor = 0x0000800000000ull;
+  static const uptr kShadowAdd = 0x0000000000000ull;
+  static const uptr kVdsoBeg       = 0xffff000000000ull;
+};
+
+/*
+C/C++ on linux/powerpc64 (44-bit VMA)
+0000 0000 0100 - 0001 0000 0000: main binary
+0001 0000 0000 - 0001 0000 0000: -
+0001 0000 0000 - 0b00 0000 0000: shadow
+0b00 0000 0000 - 0b00 0000 0000: -
+0b00 0000 0000 - 0d00 0000 0000: metainfo (memory blocks and sync objects)
+0d00 0000 0000 - 0d00 0000 0000: -
+0d00 0000 0000 - 0f00 0000 0000: traces
+0f00 0000 0000 - 0f00 0000 0000: -
+0f00 0000 0000 - 0f50 0000 0000: heap
+0f50 0000 0000 - 0f60 0000 0000: -
+0f60 0000 0000 - 1000 0000 0000: modules and main thread stack
+*/
+struct MappingPPC64_44 {
+  static const uptr kBroken =
+      kBrokenMapping | kBrokenReverseMapping | kBrokenLinearity;
+  static const uptr kMetaShadowBeg = 0x0b0000000000ull;
+  static const uptr kMetaShadowEnd = 0x0d0000000000ull;
+  static const uptr kTraceMemBeg   = 0x0d0000000000ull;
+  static const uptr kTraceMemEnd   = 0x0f0000000000ull;
+  static const uptr kShadowBeg     = 0x000100000000ull;
+  static const uptr kShadowEnd     = 0x0b0000000000ull;
+  static const uptr kLoAppMemBeg   = 0x000000000100ull;
+  static const uptr kLoAppMemEnd   = 0x000100000000ull;
+  static const uptr kHeapMemBeg    = 0x0f0000000000ull;
+  static const uptr kHeapMemEnd    = 0x0f5000000000ull;
+  static const uptr kHiAppMemBeg   = 0x0f6000000000ull;
+  static const uptr kHiAppMemEnd   = 0x100000000000ull; // 44 bits
+  static const uptr kShadowMsk = 0x0f0000000000ull;
+  static const uptr kShadowXor = 0x002100000000ull;
+  static const uptr kShadowAdd = 0x000000000000ull;
+  static const uptr kVdsoBeg       = 0x3c0000000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+};
+
+/*
+C/C++ on linux/powerpc64 (46-bit VMA)
+0000 0000 1000 - 0100 0000 0000: main binary
+0100 0000 0000 - 0200 0000 0000: -
+0100 0000 0000 - 1000 0000 0000: shadow
+1000 0000 0000 - 1000 0000 0000: -
+1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects)
+2000 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 2200 0000 0000: traces
+2200 0000 0000 - 3d00 0000 0000: -
+3d00 0000 0000 - 3e00 0000 0000: heap
+3e00 0000 0000 - 3e80 0000 0000: -
+3e80 0000 0000 - 4000 0000 0000: modules and main thread stack
+*/
+struct MappingPPC64_46 {
+  static const uptr kMetaShadowBeg = 0x100000000000ull;
+  static const uptr kMetaShadowEnd = 0x200000000000ull;
+  static const uptr kTraceMemBeg   = 0x200000000000ull;
+  static const uptr kTraceMemEnd   = 0x220000000000ull;
+  static const uptr kShadowBeg     = 0x010000000000ull;
+  static const uptr kShadowEnd     = 0x100000000000ull;
+  static const uptr kHeapMemBeg    = 0x3d0000000000ull;
+  static const uptr kHeapMemEnd    = 0x3e0000000000ull;
+  static const uptr kLoAppMemBeg   = 0x000000001000ull;
+  static const uptr kLoAppMemEnd   = 0x010000000000ull;
+  static const uptr kHiAppMemBeg   = 0x3e8000000000ull;
+  static const uptr kHiAppMemEnd   = 0x400000000000ull; // 46 bits
+  static const uptr kShadowMsk = 0x3c0000000000ull;
+  static const uptr kShadowXor = 0x020000000000ull;
+  static const uptr kShadowAdd = 0x000000000000ull;
+  static const uptr kVdsoBeg       = 0x7800000000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+};
+
+/*
+C/C++ on linux/powerpc64 (47-bit VMA)
+0000 0000 1000 - 0100 0000 0000: main binary
+0100 0000 0000 - 0200 0000 0000: -
+0100 0000 0000 - 1000 0000 0000: shadow
+1000 0000 0000 - 1000 0000 0000: -
+1000 0000 0000 - 2000 0000 0000: metainfo (memory blocks and sync objects)
+2000 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 2200 0000 0000: traces
+2200 0000 0000 - 7d00 0000 0000: -
+7d00 0000 0000 - 7e00 0000 0000: heap
+7e00 0000 0000 - 7e80 0000 0000: -
+7e80 0000 0000 - 8000 0000 0000: modules and main thread stack
+*/
+struct MappingPPC64_47 {
+  static const uptr kMetaShadowBeg = 0x100000000000ull;
+  static const uptr kMetaShadowEnd = 0x200000000000ull;
+  static const uptr kTraceMemBeg   = 0x200000000000ull;
+  static const uptr kTraceMemEnd   = 0x220000000000ull;
+  static const uptr kShadowBeg     = 0x010000000000ull;
+  static const uptr kShadowEnd     = 0x100000000000ull;
+  static const uptr kHeapMemBeg    = 0x7d0000000000ull;
+  static const uptr kHeapMemEnd    = 0x7e0000000000ull;
+  static const uptr kLoAppMemBeg   = 0x000000001000ull;
+  static const uptr kLoAppMemEnd   = 0x010000000000ull;
+  static const uptr kHiAppMemBeg   = 0x7e8000000000ull;
+  static const uptr kHiAppMemEnd   = 0x800000000000ull; // 47 bits
+  static const uptr kShadowMsk = 0x7c0000000000ull;
+  static const uptr kShadowXor = 0x020000000000ull;
+  static const uptr kShadowAdd = 0x000000000000ull;
+  static const uptr kVdsoBeg       = 0x7800000000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+};
+
+/*
+C/C++ on linux/s390x
+While the kernel provides a 64-bit address space, we have to restrict ourselves
+to 48 bits due to how e.g. SyncVar::GetId() works.
+0000 0000 1000 - 0e00 0000 0000: binary, modules, stacks - 14 TiB
+0e00 0000 0000 - 4000 0000 0000: -
+4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app)
+8000 0000 0000 - 9000 0000 0000: -
+9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app)
+9800 0000 0000 - a000 0000 0000: -
+a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads)
+b000 0000 0000 - be00 0000 0000: -
+be00 0000 0000 - c000 0000 0000: heap - 2TiB (max supported by the allocator)
+*/
+struct MappingS390x {
+  static const uptr kMetaShadowBeg = 0x900000000000ull;
+  static const uptr kMetaShadowEnd = 0x980000000000ull;
+  static const uptr kTraceMemBeg   = 0xa00000000000ull;
+  static const uptr kTraceMemEnd   = 0xb00000000000ull;
+  static const uptr kShadowBeg     = 0x400000000000ull;
+  static const uptr kShadowEnd     = 0x800000000000ull;
+  static const uptr kHeapMemBeg    = 0xbe0000000000ull;
+  static const uptr kHeapMemEnd    = 0xc00000000000ull;
+  static const uptr kLoAppMemBeg   = 0x000000001000ull;
+  static const uptr kLoAppMemEnd   = 0x0e0000000000ull;
+  static const uptr kHiAppMemBeg   = 0xc00000004000ull;
+  static const uptr kHiAppMemEnd   = 0xc00000004000ull;
+  static const uptr kShadowMsk = 0xb00000000000ull;
+  static const uptr kShadowXor = 0x100000000000ull;
+  static const uptr kShadowAdd = 0x000000000000ull;
+  static const uptr kVdsoBeg       = 0xfffffffff000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+};
+
+/* Go on linux, darwin and freebsd on x86_64
+0000 0000 1000 - 0000 1000 0000: executable
+0000 1000 0000 - 00c0 0000 0000: -
+00c0 0000 0000 - 00e0 0000 0000: heap
+00e0 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 2380 0000 0000: shadow
+2380 0000 0000 - 3000 0000 0000: -
+3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
+4000 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 8000 0000 0000: -
+*/
+
+struct MappingGo48 {
+  static const uptr kMetaShadowBeg = 0x300000000000ull;
+  static const uptr kMetaShadowEnd = 0x400000000000ull;
+  static const uptr kTraceMemBeg   = 0x600000000000ull;
+  static const uptr kTraceMemEnd   = 0x620000000000ull;
+  static const uptr kShadowBeg     = 0x200000000000ull;
+  static const uptr kShadowEnd     = 0x238000000000ull;
+  static const uptr kLoAppMemBeg = 0x000000001000ull;
+  static const uptr kLoAppMemEnd = 0x00e000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+  static const uptr kHiAppMemBeg = 0;
+  static const uptr kHiAppMemEnd = 0;
+  static const uptr kHeapMemBeg = 0;
+  static const uptr kHeapMemEnd = 0;
+  static const uptr kVdsoBeg = 0;
+  static const uptr kShadowMsk = 0;
+  static const uptr kShadowXor = 0;
+  static const uptr kShadowAdd = 0x200000000000ull;
+};
+
+/* Go on windows
+0000 0000 1000 - 0000 1000 0000: executable
+0000 1000 0000 - 00f8 0000 0000: -
+00c0 0000 0000 - 00e0 0000 0000: heap
+00e0 0000 0000 - 0100 0000 0000: -
+0100 0000 0000 - 0500 0000 0000: shadow
+0500 0000 0000 - 0700 0000 0000: traces
+0700 0000 0000 - 0770 0000 0000: metainfo (memory blocks and sync objects)
+07d0 0000 0000 - 8000 0000 0000: -
+*/
+
+struct MappingGoWindows {
+  static const uptr kMetaShadowBeg = 0x070000000000ull;
+  static const uptr kMetaShadowEnd = 0x077000000000ull;
+  static const uptr kTraceMemBeg = 0x050000000000ull;
+  static const uptr kTraceMemEnd = 0x070000000000ull;
+  static const uptr kShadowBeg     = 0x010000000000ull;
+  static const uptr kShadowEnd     = 0x050000000000ull;
+  static const uptr kLoAppMemBeg = 0x000000001000ull;
+  static const uptr kLoAppMemEnd = 0x00e000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+  static const uptr kHiAppMemBeg = 0;
+  static const uptr kHiAppMemEnd = 0;
+  static const uptr kHeapMemBeg = 0;
+  static const uptr kHeapMemEnd = 0;
+  static const uptr kVdsoBeg = 0;
+  static const uptr kShadowMsk = 0;
+  static const uptr kShadowXor = 0;
+  static const uptr kShadowAdd = 0x010000000000ull;
+};
+
+/* Go on linux/powerpc64 (46-bit VMA)
+0000 0000 1000 - 0000 1000 0000: executable
+0000 1000 0000 - 00c0 0000 0000: -
+00c0 0000 0000 - 00e0 0000 0000: heap
+00e0 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 2380 0000 0000: shadow
+2380 0000 0000 - 2400 0000 0000: -
+2400 0000 0000 - 3400 0000 0000: metainfo (memory blocks and sync objects)
+3400 0000 0000 - 3600 0000 0000: -
+3600 0000 0000 - 3800 0000 0000: traces
+3800 0000 0000 - 4000 0000 0000: -
+*/
+
+struct MappingGoPPC64_46 {
+  static const uptr kMetaShadowBeg = 0x240000000000ull;
+  static const uptr kMetaShadowEnd = 0x340000000000ull;
+  static const uptr kTraceMemBeg   = 0x360000000000ull;
+  static const uptr kTraceMemEnd   = 0x380000000000ull;
+  static const uptr kShadowBeg     = 0x200000000000ull;
+  static const uptr kShadowEnd     = 0x238000000000ull;
+  static const uptr kLoAppMemBeg = 0x000000001000ull;
+  static const uptr kLoAppMemEnd = 0x00e000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+  static const uptr kHiAppMemBeg = 0;
+  static const uptr kHiAppMemEnd = 0;
+  static const uptr kHeapMemBeg = 0;
+  static const uptr kHeapMemEnd = 0;
+  static const uptr kVdsoBeg = 0;
+  static const uptr kShadowMsk = 0;
+  static const uptr kShadowXor = 0;
+  static const uptr kShadowAdd = 0x200000000000ull;
+};
+
+/* Go on linux/powerpc64 (47-bit VMA)
+0000 0000 1000 - 0000 1000 0000: executable
+0000 1000 0000 - 00c0 0000 0000: -
+00c0 0000 0000 - 00e0 0000 0000: heap
+00e0 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 3000 0000 0000: shadow
+3000 0000 0000 - 3000 0000 0000: -
+3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
+4000 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 8000 0000 0000: -
+*/
+
+struct MappingGoPPC64_47 {
+  static const uptr kMetaShadowBeg = 0x300000000000ull;
+  static const uptr kMetaShadowEnd = 0x400000000000ull;
+  static const uptr kTraceMemBeg   = 0x600000000000ull;
+  static const uptr kTraceMemEnd   = 0x620000000000ull;
+  static const uptr kShadowBeg     = 0x200000000000ull;
+  static const uptr kShadowEnd     = 0x300000000000ull;
+  static const uptr kLoAppMemBeg = 0x000000001000ull;
+  static const uptr kLoAppMemEnd = 0x00e000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+  static const uptr kHiAppMemBeg = 0;
+  static const uptr kHiAppMemEnd = 0;
+  static const uptr kHeapMemBeg = 0;
+  static const uptr kHeapMemEnd = 0;
+  static const uptr kVdsoBeg = 0;
+  static const uptr kShadowMsk = 0;
+  static const uptr kShadowXor = 0;
+  static const uptr kShadowAdd = 0x200000000000ull;
+};
+
+/* Go on linux/aarch64 (48-bit VMA) and darwin/aarch64 (47-bit VMA)
+0000 0000 1000 - 0000 1000 0000: executable
+0000 1000 0000 - 00c0 0000 0000: -
+00c0 0000 0000 - 00e0 0000 0000: heap
+00e0 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 3000 0000 0000: shadow
+3000 0000 0000 - 3000 0000 0000: -
+3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
+4000 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 8000 0000 0000: -
+*/
+struct MappingGoAarch64 {
+  static const uptr kMetaShadowBeg = 0x300000000000ull;
+  static const uptr kMetaShadowEnd = 0x400000000000ull;
+  static const uptr kTraceMemBeg   = 0x600000000000ull;
+  static const uptr kTraceMemEnd   = 0x620000000000ull;
+  static const uptr kShadowBeg     = 0x200000000000ull;
+  static const uptr kShadowEnd     = 0x300000000000ull;
+  static const uptr kLoAppMemBeg = 0x000000001000ull;
+  static const uptr kLoAppMemEnd = 0x00e000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+  static const uptr kHiAppMemBeg = 0;
+  static const uptr kHiAppMemEnd = 0;
+  static const uptr kHeapMemBeg = 0;
+  static const uptr kHeapMemEnd = 0;
+  static const uptr kVdsoBeg = 0;
+  static const uptr kShadowMsk = 0;
+  static const uptr kShadowXor = 0;
+  static const uptr kShadowAdd = 0x200000000000ull;
+};
+
+/*
+Go on linux/mips64 (47-bit VMA)
+0000 0000 1000 - 0000 1000 0000: executable
+0000 1000 0000 - 00c0 0000 0000: -
+00c0 0000 0000 - 00e0 0000 0000: heap
+00e0 0000 0000 - 2000 0000 0000: -
+2000 0000 0000 - 3000 0000 0000: shadow
+3000 0000 0000 - 3000 0000 0000: -
+3000 0000 0000 - 4000 0000 0000: metainfo (memory blocks and sync objects)
+4000 0000 0000 - 6000 0000 0000: -
+6000 0000 0000 - 6200 0000 0000: traces
+6200 0000 0000 - 8000 0000 0000: -
+*/
+struct MappingGoMips64_47 {
+  static const uptr kMetaShadowBeg = 0x300000000000ull;
+  static const uptr kMetaShadowEnd = 0x400000000000ull;
+  static const uptr kTraceMemBeg = 0x600000000000ull;
+  static const uptr kTraceMemEnd = 0x620000000000ull;
+  static const uptr kShadowBeg = 0x200000000000ull;
+  static const uptr kShadowEnd = 0x300000000000ull;
+  static const uptr kLoAppMemBeg = 0x000000001000ull;
+  static const uptr kLoAppMemEnd = 0x00e000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+  static const uptr kHiAppMemBeg = 0;
+  static const uptr kHiAppMemEnd = 0;
+  static const uptr kHeapMemBeg = 0;
+  static const uptr kHeapMemEnd = 0;
+  static const uptr kVdsoBeg = 0;
+  static const uptr kShadowMsk = 0;
+  static const uptr kShadowXor = 0;
+  static const uptr kShadowAdd = 0x200000000000ull;
+};
+
+/*
+Go on linux/s390x
+0000 0000 1000 - 1000 0000 0000: executable and heap - 16 TiB
+1000 0000 0000 - 4000 0000 0000: -
+4000 0000 0000 - 8000 0000 0000: shadow - 64TiB (4 * app)
+8000 0000 0000 - 9000 0000 0000: -
+9000 0000 0000 - 9800 0000 0000: metainfo - 8TiB (0.5 * app)
+9800 0000 0000 - a000 0000 0000: -
+a000 0000 0000 - b000 0000 0000: traces - 16TiB (max history * 128k threads)
+*/
+struct MappingGoS390x {
+  static const uptr kMetaShadowBeg = 0x900000000000ull;
+  static const uptr kMetaShadowEnd = 0x980000000000ull;
+  static const uptr kTraceMemBeg   = 0xa00000000000ull;
+  static const uptr kTraceMemEnd   = 0xb00000000000ull;
+  static const uptr kShadowBeg     = 0x400000000000ull;
+  static const uptr kShadowEnd     = 0x800000000000ull;
+  static const uptr kLoAppMemBeg = 0x000000001000ull;
+  static const uptr kLoAppMemEnd = 0x100000000000ull;
+  static const uptr kMidAppMemBeg = 0;
+  static const uptr kMidAppMemEnd = 0;
+  static const uptr kHiAppMemBeg = 0;
+  static const uptr kHiAppMemEnd = 0;
+  static const uptr kHeapMemBeg = 0;
+  static const uptr kHeapMemEnd = 0;
+  static const uptr kVdsoBeg = 0;
+  static const uptr kShadowMsk = 0;
+  static const uptr kShadowXor = 0;
+  static const uptr kShadowAdd = 0x400000000000ull;
+};
+
+extern uptr vmaSize;
+
+template <typename Func, typename Arg>
+ALWAYS_INLINE auto SelectMapping(Arg arg) {
+#if SANITIZER_GO
+#  if defined(__powerpc64__)
+  switch (vmaSize) {
+    case 46:
+      return Func::template Apply<MappingGoPPC64_46>(arg);
+    case 47:
+      return Func::template Apply<MappingGoPPC64_47>(arg);
+  }
+#  elif defined(__mips64)
+  return Func::template Apply<MappingGoMips64_47>(arg);
+#  elif defined(__s390x__)
+  return Func::template Apply<MappingGoS390x>(arg);
+#  elif defined(__aarch64__)
+  return Func::template Apply<MappingGoAarch64>(arg);
+#  elif SANITIZER_WINDOWS
+  return Func::template Apply<MappingGoWindows>(arg);
+#  else
+  return Func::template Apply<MappingGo48>(arg);
+#  endif
+#else  // SANITIZER_GO
+#  if defined(__x86_64__) || SANITIZER_IOSSIM || SANITIZER_MAC && !SANITIZER_IOS
+  return Func::template Apply<Mapping48AddressSpace>(arg);
+#  elif defined(__aarch64__) && defined(__APPLE__)
+  return Func::template Apply<MappingAppleAarch64>(arg);
+#  elif defined(__aarch64__) && !defined(__APPLE__)
+  switch (vmaSize) {
+    case 39:
+      return Func::template Apply<MappingAarch64_39>(arg);
+    case 42:
+      return Func::template Apply<MappingAarch64_42>(arg);
+    case 48:
+      return Func::template Apply<MappingAarch64_48>(arg);
+  }
+#  elif defined(__powerpc64__)
+  switch (vmaSize) {
+    case 44:
+      return Func::template Apply<MappingPPC64_44>(arg);
+    case 46:
+      return Func::template Apply<MappingPPC64_46>(arg);
+    case 47:
+      return Func::template Apply<MappingPPC64_47>(arg);
+  }
+#  elif defined(__mips64)
+  return Func::template Apply<MappingMips64_40>(arg);
+#  elif defined(__s390x__)
+  return Func::template Apply<MappingS390x>(arg);
+#  else
+#    error "unsupported platform"
+#  endif
+#endif
+  Die();
+}
+
+template <typename Func>
+void ForEachMapping() {
+  Func::template Apply<Mapping48AddressSpace>();
+  Func::template Apply<MappingMips64_40>();
+  Func::template Apply<MappingAppleAarch64>();
+  Func::template Apply<MappingAarch64_39>();
+  Func::template Apply<MappingAarch64_42>();
+  Func::template Apply<MappingAarch64_48>();
+  Func::template Apply<MappingPPC64_44>();
+  Func::template Apply<MappingPPC64_46>();
+  Func::template Apply<MappingPPC64_47>();
+  Func::template Apply<MappingS390x>();
+  Func::template Apply<MappingGo48>();
+  Func::template Apply<MappingGoWindows>();
+  Func::template Apply<MappingGoPPC64_46>();
+  Func::template Apply<MappingGoPPC64_47>();
+  Func::template Apply<MappingGoAarch64>();
+  Func::template Apply<MappingGoMips64_47>();
+  Func::template Apply<MappingGoS390x>();
+}
+
+enum MappingType {
+  kLoAppMemBeg,
+  kLoAppMemEnd,
+  kHiAppMemBeg,
+  kHiAppMemEnd,
+  kMidAppMemBeg,
+  kMidAppMemEnd,
+  kHeapMemBeg,
+  kHeapMemEnd,
+  kShadowBeg,
+  kShadowEnd,
+  kMetaShadowBeg,
+  kMetaShadowEnd,
+  kTraceMemBeg,
+  kTraceMemEnd,
+  kVdsoBeg,
+};
+
+struct MappingField {
+  template <typename Mapping>
+  static uptr Apply(MappingType type) {
+    switch (type) {
+      case kLoAppMemBeg:
+        return Mapping::kLoAppMemBeg;
+      case kLoAppMemEnd:
+        return Mapping::kLoAppMemEnd;
+      case kMidAppMemBeg:
+        return Mapping::kMidAppMemBeg;
+      case kMidAppMemEnd:
+        return Mapping::kMidAppMemEnd;
+      case kHiAppMemBeg:
+        return Mapping::kHiAppMemBeg;
+      case kHiAppMemEnd:
+        return Mapping::kHiAppMemEnd;
+      case kHeapMemBeg:
+        return Mapping::kHeapMemBeg;
+      case kHeapMemEnd:
+        return Mapping::kHeapMemEnd;
+      case kVdsoBeg:
+        return Mapping::kVdsoBeg;
+      case kShadowBeg:
+        return Mapping::kShadowBeg;
+      case kShadowEnd:
+        return Mapping::kShadowEnd;
+      case kMetaShadowBeg:
+        return Mapping::kMetaShadowBeg;
+      case kMetaShadowEnd:
+        return Mapping::kMetaShadowEnd;
+      case kTraceMemBeg:
+        return Mapping::kTraceMemBeg;
+      case kTraceMemEnd:
+        return Mapping::kTraceMemEnd;
+    }
+    Die();
+  }
+};
+
+ALWAYS_INLINE
+uptr LoAppMemBeg(void) { return SelectMapping<MappingField>(kLoAppMemBeg); }
+ALWAYS_INLINE
+uptr LoAppMemEnd(void) { return SelectMapping<MappingField>(kLoAppMemEnd); }
+
+ALWAYS_INLINE
+uptr MidAppMemBeg(void) { return SelectMapping<MappingField>(kMidAppMemBeg); }
+ALWAYS_INLINE
+uptr MidAppMemEnd(void) { return SelectMapping<MappingField>(kMidAppMemEnd); }
+
+ALWAYS_INLINE
+uptr HeapMemBeg(void) { return SelectMapping<MappingField>(kHeapMemBeg); }
+ALWAYS_INLINE
+uptr HeapMemEnd(void) { return SelectMapping<MappingField>(kHeapMemEnd); }
+
+ALWAYS_INLINE
+uptr HiAppMemBeg(void) { return SelectMapping<MappingField>(kHiAppMemBeg); }
+ALWAYS_INLINE
+uptr HiAppMemEnd(void) { return SelectMapping<MappingField>(kHiAppMemEnd); }
+
+ALWAYS_INLINE
+uptr VdsoBeg(void) { return SelectMapping<MappingField>(kVdsoBeg); }
+
+ALWAYS_INLINE
+uptr ShadowBeg(void) { return SelectMapping<MappingField>(kShadowBeg); }
+ALWAYS_INLINE
+uptr ShadowEnd(void) { return SelectMapping<MappingField>(kShadowEnd); }
+
+ALWAYS_INLINE
+uptr MetaShadowBeg(void) { return SelectMapping<MappingField>(kMetaShadowBeg); }
+ALWAYS_INLINE
+uptr MetaShadowEnd(void) { return SelectMapping<MappingField>(kMetaShadowEnd); }
+
+ALWAYS_INLINE
+uptr TraceMemBeg(void) { return SelectMapping<MappingField>(kTraceMemBeg); }
+ALWAYS_INLINE
+uptr TraceMemEnd(void) { return SelectMapping<MappingField>(kTraceMemEnd); }
+
+struct IsAppMemImpl {
+  template <typename Mapping>
+  static bool Apply(uptr mem) {
+  return (mem >= Mapping::kHeapMemBeg && mem < Mapping::kHeapMemEnd) ||
+         (mem >= Mapping::kMidAppMemBeg && mem < Mapping::kMidAppMemEnd) ||
+         (mem >= Mapping::kLoAppMemBeg && mem < Mapping::kLoAppMemEnd) ||
+         (mem >= Mapping::kHiAppMemBeg && mem < Mapping::kHiAppMemEnd);
+  }
+};
+
+ALWAYS_INLINE
+bool IsAppMem(uptr mem) { return SelectMapping<IsAppMemImpl>(mem); }
+
+struct IsShadowMemImpl {
+  template <typename Mapping>
+  static bool Apply(uptr mem) {
+    return mem >= Mapping::kShadowBeg && mem <= Mapping::kShadowEnd;
+  }
+};
+
+ALWAYS_INLINE
+bool IsShadowMem(RawShadow *p) {
+  return SelectMapping<IsShadowMemImpl>(reinterpret_cast<uptr>(p));
+}
+
+struct IsMetaMemImpl {
+  template <typename Mapping>
+  static bool Apply(uptr mem) {
+    return mem >= Mapping::kMetaShadowBeg && mem <= Mapping::kMetaShadowEnd;
+  }
+};
+
+ALWAYS_INLINE
+bool IsMetaMem(const u32 *p) {
+  return SelectMapping<IsMetaMemImpl>(reinterpret_cast<uptr>(p));
+}
+
+struct MemToShadowImpl {
+  template <typename Mapping>
+  static uptr Apply(uptr x) {
+    DCHECK(IsAppMemImpl::Apply<Mapping>(x));
+    return (((x) & ~(Mapping::kShadowMsk | (kShadowCell - 1))) ^
+            Mapping::kShadowXor) *
+               kShadowMultiplier +
+           Mapping::kShadowAdd;
+  }
+};
+
+ALWAYS_INLINE
+RawShadow *MemToShadow(uptr x) {
+  return reinterpret_cast<RawShadow *>(SelectMapping<MemToShadowImpl>(x));
+}
+
+struct MemToMetaImpl {
+  template <typename Mapping>
+  static u32 *Apply(uptr x) {
+    DCHECK(IsAppMemImpl::Apply<Mapping>(x));
+    return (u32 *)(((((x) & ~(Mapping::kShadowMsk | (kMetaShadowCell - 1)))) /
+                    kMetaShadowCell * kMetaShadowSize) |
+                   Mapping::kMetaShadowBeg);
+  }
+};
+
+ALWAYS_INLINE
+u32 *MemToMeta(uptr x) { return SelectMapping<MemToMetaImpl>(x); }
+
+struct ShadowToMemImpl {
+  template <typename Mapping>
+  static uptr Apply(uptr sp) {
+    if (!IsShadowMemImpl::Apply<Mapping>(sp))
+      return 0;
+    // The shadow mapping is non-linear and we've lost some bits, so we don't
+    // have an easy way to restore the original app address. But the mapping is
+    // a bijection, so we try to restore the address as belonging to
+    // low/mid/high range consecutively and see if shadow->app->shadow mapping
+    // gives us the same address.
+    uptr p =
+        ((sp - Mapping::kShadowAdd) / kShadowMultiplier) ^ Mapping::kShadowXor;
+    if (p >= Mapping::kLoAppMemBeg && p < Mapping::kLoAppMemEnd &&
+        MemToShadowImpl::Apply<Mapping>(p) == sp)
+      return p;
+    if (Mapping::kMidAppMemBeg) {
+      uptr p_mid = p + (Mapping::kMidAppMemBeg & Mapping::kShadowMsk);
+      if (p_mid >= Mapping::kMidAppMemBeg && p_mid < Mapping::kMidAppMemEnd &&
+          MemToShadowImpl::Apply<Mapping>(p_mid) == sp)
+        return p_mid;
+    }
+    return p | Mapping::kShadowMsk;
+  }
+};
+
+ALWAYS_INLINE
+uptr ShadowToMem(RawShadow *s) {
+  return SelectMapping<ShadowToMemImpl>(reinterpret_cast<uptr>(s));
+}
+
+// Compresses addr to kCompressedAddrBits stored in least significant bits.
+ALWAYS_INLINE uptr CompressAddr(uptr addr) {
+  return addr & ((1ull << kCompressedAddrBits) - 1);
+}
+
+struct RestoreAddrImpl {
+  typedef uptr Result;
+  template <typename Mapping>
+  static Result Apply(uptr addr) {
+    // To restore the address we go over all app memory ranges and check if top
+    // 3 bits of the compressed addr match that of the app range. If yes, we
+    // assume that the compressed address come from that range and restore the
+    // missing top bits to match the app range address.
+    const uptr ranges[] = {
+        Mapping::kLoAppMemBeg,  Mapping::kLoAppMemEnd, Mapping::kMidAppMemBeg,
+        Mapping::kMidAppMemEnd, Mapping::kHiAppMemBeg, Mapping::kHiAppMemEnd,
+        Mapping::kHeapMemBeg,   Mapping::kHeapMemEnd,
+    };
+    const uptr indicator = 0x0e0000000000ull;
+    const uptr ind_lsb = 1ull << LeastSignificantSetBitIndex(indicator);
+    for (uptr i = 0; i < ARRAY_SIZE(ranges); i += 2) {
+      uptr beg = ranges[i];
+      uptr end = ranges[i + 1];
+      if (beg == end)
+        continue;
+      for (uptr p = beg; p < end; p = RoundDown(p + ind_lsb, ind_lsb)) {
+        if ((addr & indicator) == (p & indicator))
+          return addr | (p & ~(ind_lsb - 1));
+      }
+    }
+    Printf("ThreadSanitizer: failed to restore address 0x%zx\n", addr);
+    Die();
+  }
+};
+
+// Restores compressed addr from kCompressedAddrBits to full representation.
+// This is called only during reporting and is not performance-critical.
+inline uptr RestoreAddr(uptr addr) {
+  return SelectMapping<RestoreAddrImpl>(addr);
+}
+
+// The additional page is to catch shadow stack overflow as paging fault.
+// Windows wants 64K alignment for mmaps.
+const uptr kTotalTraceSize = (kTraceSize * sizeof(Event) + sizeof(Trace)
+    + (64 << 10) + (64 << 10) - 1) & ~((64 << 10) - 1);
+
+struct GetThreadTraceImpl {
+  template <typename Mapping>
+  static uptr Apply(uptr tid) {
+    uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize;
+    DCHECK_LT(p, Mapping::kTraceMemEnd);
+    return p;
+  }
+};
+
+ALWAYS_INLINE
+uptr GetThreadTrace(int tid) { return SelectMapping<GetThreadTraceImpl>(tid); }
+
+struct GetThreadTraceHeaderImpl {
+  template <typename Mapping>
+  static uptr Apply(uptr tid) {
+    uptr p = Mapping::kTraceMemBeg + tid * kTotalTraceSize +
+             kTraceSize * sizeof(Event);
+    DCHECK_LT(p, Mapping::kTraceMemEnd);
+    return p;
+  }
+};
+
+ALWAYS_INLINE
+uptr GetThreadTraceHeader(int tid) {
+  return SelectMapping<GetThreadTraceHeaderImpl>(tid);
+}
+
+void InitializePlatform();
+void InitializePlatformEarly();
+void CheckAndProtect();
+void InitializeShadowMemoryPlatform();
+void FlushShadowMemory();
+void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns);
+int ExtractResolvFDs(void *state, int *fds, int nfd);
+int ExtractRecvmsgFDs(void *msg, int *fds, int nfd);
+uptr ExtractLongJmpSp(uptr *env);
+void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size);
+
+int call_pthread_cancel_with_cleanup(int (*fn)(void *arg),
+                                     void (*cleanup)(void *arg), void *arg);
+
+void DestroyThreadState();
+void PlatformCleanUpThreadState(ThreadState *thr);
+
+}  // namespace __tsan
+
+#endif  // TSAN_PLATFORM_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_platform_linux.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_platform_linux.cpp
new file mode 100644
index 0000000000000..73ec14892d28f
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_platform_linux.cpp
@@ -0,0 +1,545 @@
+//===-- tsan_platform_linux.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Linux- and BSD-specific code.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_linux.h"
+#include "sanitizer_common/sanitizer_platform_limits_netbsd.h"
+#include "sanitizer_common/sanitizer_platform_limits_posix.h"
+#include "sanitizer_common/sanitizer_posix.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_stoptheworld.h"
+#include "tsan_flags.h"
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#if SANITIZER_LINUX
+#include <sys/personality.h>
+#include <setjmp.h>
+#endif
+#include <sys/syscall.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sched.h>
+#include <dlfcn.h>
+#if SANITIZER_LINUX
+#define __need_res_state
+#include <resolv.h>
+#endif
+
+#ifdef sa_handler
+# undef sa_handler
+#endif
+
+#ifdef sa_sigaction
+# undef sa_sigaction
+#endif
+
+#if SANITIZER_FREEBSD
+extern "C" void *__libc_stack_end;
+void *__libc_stack_end = 0;
+#endif
+
+#if SANITIZER_LINUX && defined(__aarch64__) && !SANITIZER_GO
+# define INIT_LONGJMP_XOR_KEY 1
+#else
+# define INIT_LONGJMP_XOR_KEY 0
+#endif
+
+#if INIT_LONGJMP_XOR_KEY
+#include "interception/interception.h"
+// Must be declared outside of other namespaces.
+DECLARE_REAL(int, _setjmp, void *env)
+#endif
+
+namespace __tsan {
+
+#if INIT_LONGJMP_XOR_KEY
+static void InitializeLongjmpXorKey();
+static uptr longjmp_xor_key;
+#endif
+
+// Runtime detected VMA size.
+uptr vmaSize;
+
+enum {
+  MemTotal,
+  MemShadow,
+  MemMeta,
+  MemFile,
+  MemMmap,
+  MemTrace,
+  MemHeap,
+  MemOther,
+  MemCount,
+};
+
+void FillProfileCallback(uptr p, uptr rss, bool file, uptr *mem) {
+  mem[MemTotal] += rss;
+  if (p >= ShadowBeg() && p < ShadowEnd())
+    mem[MemShadow] += rss;
+  else if (p >= MetaShadowBeg() && p < MetaShadowEnd())
+    mem[MemMeta] += rss;
+  else if ((p >= LoAppMemBeg() && p < LoAppMemEnd()) ||
+           (p >= MidAppMemBeg() && p < MidAppMemEnd()) ||
+           (p >= HiAppMemBeg() && p < HiAppMemEnd()))
+    mem[file ? MemFile : MemMmap] += rss;
+  else if (p >= HeapMemBeg() && p < HeapMemEnd())
+    mem[MemHeap] += rss;
+  else if (p >= TraceMemBeg() && p < TraceMemEnd())
+    mem[MemTrace] += rss;
+  else
+    mem[MemOther] += rss;
+}
+
+void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
+  uptr mem[MemCount];
+  internal_memset(mem, 0, sizeof(mem));
+  GetMemoryProfile(FillProfileCallback, mem);
+  auto meta = ctx->metamap.GetMemoryStats();
+  StackDepotStats stacks = StackDepotGetStats();
+  uptr nthread, nlive;
+  ctx->thread_registry.GetNumberOfThreads(&nthread, &nlive);
+  uptr internal_stats[AllocatorStatCount];
+  internal_allocator()->GetStats(internal_stats);
+  // All these are allocated from the common mmap region.
+  mem[MemMmap] -= meta.mem_block + meta.sync_obj + stacks.allocated +
+                  internal_stats[AllocatorStatMapped];
+  if (s64(mem[MemMmap]) < 0)
+    mem[MemMmap] = 0;
+  internal_snprintf(
+      buf, buf_size,
+      "%llus: RSS %zd MB: shadow:%zd meta:%zd file:%zd mmap:%zd"
+      " trace:%zd heap:%zd other:%zd intalloc:%zd memblocks:%zd syncobj:%zu"
+      " stacks=%zd[%zd] nthr=%zd/%zd\n",
+      uptime_ns / (1000 * 1000 * 1000), mem[MemTotal] >> 20,
+      mem[MemShadow] >> 20, mem[MemMeta] >> 20, mem[MemFile] >> 20,
+      mem[MemMmap] >> 20, mem[MemTrace] >> 20, mem[MemHeap] >> 20,
+      mem[MemOther] >> 20, internal_stats[AllocatorStatMapped] >> 20,
+      meta.mem_block >> 20, meta.sync_obj >> 20, stacks.allocated >> 20,
+      stacks.n_uniq_ids, nlive, nthread);
+}
+
+#  if SANITIZER_LINUX
+void FlushShadowMemoryCallback(
+    const SuspendedThreadsList &suspended_threads_list,
+    void *argument) {
+  ReleaseMemoryPagesToOS(ShadowBeg(), ShadowEnd());
+}
+#endif
+
+void FlushShadowMemory() {
+#if SANITIZER_LINUX
+  StopTheWorld(FlushShadowMemoryCallback, 0);
+#endif
+}
+
+#if !SANITIZER_GO
+// Mark shadow for .rodata sections with the special kShadowRodata marker.
+// Accesses to .rodata can't race, so this saves time, memory and trace space.
+static void MapRodata() {
+  // First create temp file.
+  const char *tmpdir = GetEnv("TMPDIR");
+  if (tmpdir == 0)
+    tmpdir = GetEnv("TEST_TMPDIR");
+#ifdef P_tmpdir
+  if (tmpdir == 0)
+    tmpdir = P_tmpdir;
+#endif
+  if (tmpdir == 0)
+    return;
+  char name[256];
+  internal_snprintf(name, sizeof(name), "%s/tsan.rodata.%d",
+                    tmpdir, (int)internal_getpid());
+  uptr openrv = internal_open(name, O_RDWR | O_CREAT | O_EXCL, 0600);
+  if (internal_iserror(openrv))
+    return;
+  internal_unlink(name);  // Unlink it now, so that we can reuse the buffer.
+  fd_t fd = openrv;
+  // Fill the file with kShadowRodata.
+  const uptr kMarkerSize = 512 * 1024 / sizeof(RawShadow);
+  InternalMmapVector<RawShadow> marker(kMarkerSize);
+  // volatile to prevent insertion of memset
+  for (volatile RawShadow *p = marker.data(); p < marker.data() + kMarkerSize;
+       p++)
+    *p = kShadowRodata;
+  internal_write(fd, marker.data(), marker.size() * sizeof(RawShadow));
+  // Map the file into memory.
+  uptr page = internal_mmap(0, GetPageSizeCached(), PROT_READ | PROT_WRITE,
+                            MAP_PRIVATE | MAP_ANONYMOUS, fd, 0);
+  if (internal_iserror(page)) {
+    internal_close(fd);
+    return;
+  }
+  // Map the file into shadow of .rodata sections.
+  MemoryMappingLayout proc_maps(/*cache_enabled*/true);
+  // Reusing the buffer 'name'.
+  MemoryMappedSegment segment(name, ARRAY_SIZE(name));
+  while (proc_maps.Next(&segment)) {
+    if (segment.filename[0] != 0 && segment.filename[0] != '[' &&
+        segment.IsReadable() && segment.IsExecutable() &&
+        !segment.IsWritable() && IsAppMem(segment.start)) {
+      // Assume it's .rodata
+      char *shadow_start = (char *)MemToShadow(segment.start);
+      char *shadow_end = (char *)MemToShadow(segment.end);
+      for (char *p = shadow_start; p < shadow_end;
+           p += marker.size() * sizeof(RawShadow)) {
+        internal_mmap(
+            p, Min<uptr>(marker.size() * sizeof(RawShadow), shadow_end - p),
+            PROT_READ, MAP_PRIVATE | MAP_FIXED, fd, 0);
+      }
+    }
+  }
+  internal_close(fd);
+}
+
+void InitializeShadowMemoryPlatform() {
+  MapRodata();
+}
+
+#endif  // #if !SANITIZER_GO
+
+void InitializePlatformEarly() {
+  vmaSize =
+    (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
+#if defined(__aarch64__)
+# if !SANITIZER_GO
+  if (vmaSize != 39 && vmaSize != 42 && vmaSize != 48) {
+    Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
+    Printf("FATAL: Found %zd - Supported 39, 42 and 48\n", vmaSize);
+    Die();
+  }
+#else
+  if (vmaSize != 48) {
+    Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
+    Printf("FATAL: Found %zd - Supported 48\n", vmaSize);
+    Die();
+  }
+#endif
+#elif defined(__powerpc64__)
+# if !SANITIZER_GO
+  if (vmaSize != 44 && vmaSize != 46 && vmaSize != 47) {
+    Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
+    Printf("FATAL: Found %zd - Supported 44, 46, and 47\n", vmaSize);
+    Die();
+  }
+# else
+  if (vmaSize != 46 && vmaSize != 47) {
+    Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
+    Printf("FATAL: Found %zd - Supported 46, and 47\n", vmaSize);
+    Die();
+  }
+# endif
+#elif defined(__mips64)
+# if !SANITIZER_GO
+  if (vmaSize != 40) {
+    Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
+    Printf("FATAL: Found %zd - Supported 40\n", vmaSize);
+    Die();
+  }
+# else
+  if (vmaSize != 47) {
+    Printf("FATAL: ThreadSanitizer: unsupported VMA range\n");
+    Printf("FATAL: Found %zd - Supported 47\n", vmaSize);
+    Die();
+  }
+# endif
+#endif
+}
+
+void InitializePlatform() {
+  DisableCoreDumperIfNecessary();
+
+  // Go maps shadow memory lazily and works fine with limited address space.
+  // Unlimited stack is not a problem as well, because the executable
+  // is not compiled with -pie.
+#if !SANITIZER_GO
+  {
+    bool reexec = false;
+    // TSan doesn't play well with unlimited stack size (as stack
+    // overlaps with shadow memory). If we detect unlimited stack size,
+    // we re-exec the program with limited stack size as a best effort.
+    if (StackSizeIsUnlimited()) {
+      const uptr kMaxStackSize = 32 * 1024 * 1024;
+      VReport(1, "Program is run with unlimited stack size, which wouldn't "
+                 "work with ThreadSanitizer.\n"
+                 "Re-execing with stack size limited to %zd bytes.\n",
+              kMaxStackSize);
+      SetStackSizeLimitInBytes(kMaxStackSize);
+      reexec = true;
+    }
+
+    if (!AddressSpaceIsUnlimited()) {
+      Report("WARNING: Program is run with limited virtual address space,"
+             " which wouldn't work with ThreadSanitizer.\n");
+      Report("Re-execing with unlimited virtual address space.\n");
+      SetAddressSpaceUnlimited();
+      reexec = true;
+    }
+#if SANITIZER_LINUX && defined(__aarch64__)
+    // After patch "arm64: mm: support ARCH_MMAP_RND_BITS." is introduced in
+    // linux kernel, the random gap between stack and mapped area is increased
+    // from 128M to 36G on 39-bit aarch64. As it is almost impossible to cover
+    // this big range, we should disable randomized virtual space on aarch64.
+    int old_personality = personality(0xffffffff);
+    if (old_personality != -1 && (old_personality & ADDR_NO_RANDOMIZE) == 0) {
+      VReport(1, "WARNING: Program is run with randomized virtual address "
+              "space, which wouldn't work with ThreadSanitizer.\n"
+              "Re-execing with fixed virtual address space.\n");
+      CHECK_NE(personality(old_personality | ADDR_NO_RANDOMIZE), -1);
+      reexec = true;
+    }
+    // Initialize the xor key used in {sig}{set,long}jump.
+    InitializeLongjmpXorKey();
+#endif
+    if (reexec)
+      ReExec();
+  }
+
+  CheckAndProtect();
+  InitTlsSize();
+#endif  // !SANITIZER_GO
+}
+
+#if !SANITIZER_GO
+// Extract file descriptors passed to glibc internal __res_iclose function.
+// This is required to properly "close" the fds, because we do not see internal
+// closes within glibc. The code is a pure hack.
+int ExtractResolvFDs(void *state, int *fds, int nfd) {
+#if SANITIZER_LINUX && !SANITIZER_ANDROID
+  int cnt = 0;
+  struct __res_state *statp = (struct __res_state*)state;
+  for (int i = 0; i < MAXNS && cnt < nfd; i++) {
+    if (statp->_u._ext.nsaddrs[i] && statp->_u._ext.nssocks[i] != -1)
+      fds[cnt++] = statp->_u._ext.nssocks[i];
+  }
+  return cnt;
+#else
+  return 0;
+#endif
+}
+
+// Extract file descriptors passed via UNIX domain sockets.
+// This is required to properly handle "open" of these fds.
+// see 'man recvmsg' and 'man 3 cmsg'.
+int ExtractRecvmsgFDs(void *msgp, int *fds, int nfd) {
+  int res = 0;
+  msghdr *msg = (msghdr*)msgp;
+  struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
+  for (; cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+    if (cmsg->cmsg_level != SOL_SOCKET || cmsg->cmsg_type != SCM_RIGHTS)
+      continue;
+    int n = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(fds[0]);
+    for (int i = 0; i < n; i++) {
+      fds[res++] = ((int*)CMSG_DATA(cmsg))[i];
+      if (res == nfd)
+        return res;
+    }
+  }
+  return res;
+}
+
+// Reverse operation of libc stack pointer mangling
+static uptr UnmangleLongJmpSp(uptr mangled_sp) {
+#if defined(__x86_64__)
+# if SANITIZER_LINUX
+  // Reverse of:
+  //   xor  %fs:0x30, %rsi
+  //   rol  $0x11, %rsi
+  uptr sp;
+  asm("ror  $0x11,     %0 \n"
+      "xor  %%fs:0x30, %0 \n"
+      : "=r" (sp)
+      : "0" (mangled_sp));
+  return sp;
+# else
+  return mangled_sp;
+# endif
+#elif defined(__aarch64__)
+# if SANITIZER_LINUX
+  return mangled_sp ^ longjmp_xor_key;
+# else
+  return mangled_sp;
+# endif
+#elif defined(__powerpc64__)
+  // Reverse of:
+  //   ld   r4, -28696(r13)
+  //   xor  r4, r3, r4
+  uptr xor_key;
+  asm("ld  %0, -28696(%%r13)" : "=r" (xor_key));
+  return mangled_sp ^ xor_key;
+#elif defined(__mips__)
+  return mangled_sp;
+#elif defined(__s390x__)
+  // tcbhead_t.stack_guard
+  uptr xor_key = ((uptr *)__builtin_thread_pointer())[5];
+  return mangled_sp ^ xor_key;
+#else
+  #error "Unknown platform"
+#endif
+}
+
+#if SANITIZER_NETBSD
+# ifdef __x86_64__
+#  define LONG_JMP_SP_ENV_SLOT 6
+# else
+#  error unsupported
+# endif
+#elif defined(__powerpc__)
+# define LONG_JMP_SP_ENV_SLOT 0
+#elif SANITIZER_FREEBSD
+# define LONG_JMP_SP_ENV_SLOT 2
+#elif SANITIZER_LINUX
+# ifdef __aarch64__
+#  define LONG_JMP_SP_ENV_SLOT 13
+# elif defined(__mips64)
+#  define LONG_JMP_SP_ENV_SLOT 1
+# elif defined(__s390x__)
+#  define LONG_JMP_SP_ENV_SLOT 9
+# else
+#  define LONG_JMP_SP_ENV_SLOT 6
+# endif
+#endif
+
+uptr ExtractLongJmpSp(uptr *env) {
+  uptr mangled_sp = env[LONG_JMP_SP_ENV_SLOT];
+  return UnmangleLongJmpSp(mangled_sp);
+}
+
+#if INIT_LONGJMP_XOR_KEY
+// GLIBC mangles the function pointers in jmp_buf (used in {set,long}*jmp
+// functions) by XORing them with a random key.  For AArch64 it is a global
+// variable rather than a TCB one (as for x86_64/powerpc).  We obtain the key by
+// issuing a setjmp and XORing the SP pointer values to derive the key.
+static void InitializeLongjmpXorKey() {
+  // 1. Call REAL(setjmp), which stores the mangled SP in env.
+  jmp_buf env;
+  REAL(_setjmp)(env);
+
+  // 2. Retrieve vanilla/mangled SP.
+  uptr sp;
+  asm("mov  %0, sp" : "=r" (sp));
+  uptr mangled_sp = ((uptr *)&env)[LONG_JMP_SP_ENV_SLOT];
+
+  // 3. xor SPs to obtain key.
+  longjmp_xor_key = mangled_sp ^ sp;
+}
+#endif
+
+extern "C" void __tsan_tls_initialization() {}
+
+void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
+  // Check that the thr object is in tls;
+  const uptr thr_beg = (uptr)thr;
+  const uptr thr_end = (uptr)thr + sizeof(*thr);
+  CHECK_GE(thr_beg, tls_addr);
+  CHECK_LE(thr_beg, tls_addr + tls_size);
+  CHECK_GE(thr_end, tls_addr);
+  CHECK_LE(thr_end, tls_addr + tls_size);
+  // Since the thr object is huge, skip it.
+  const uptr pc = StackTrace::GetNextInstructionPc(
+      reinterpret_cast<uptr>(__tsan_tls_initialization));
+  MemoryRangeImitateWrite(thr, pc, tls_addr, thr_beg - tls_addr);
+  MemoryRangeImitateWrite(thr, pc, thr_end, tls_addr + tls_size - thr_end);
+}
+
+// Note: this function runs with async signals enabled,
+// so it must not touch any tsan state.
+int call_pthread_cancel_with_cleanup(int (*fn)(void *arg),
+                                     void (*cleanup)(void *arg), void *arg) {
+  // pthread_cleanup_push/pop are hardcore macros mess.
+  // We can't intercept nor call them w/o including pthread.h.
+  int res;
+  pthread_cleanup_push(cleanup, arg);
+  res = fn(arg);
+  pthread_cleanup_pop(0);
+  return res;
+}
+#endif  // !SANITIZER_GO
+
+#if !SANITIZER_GO
+void ReplaceSystemMalloc() { }
+#endif
+
+#if !SANITIZER_GO
+#if SANITIZER_ANDROID
+// On Android, one thread can call intercepted functions after
+// DestroyThreadState(), so add a fake thread state for "dead" threads.
+static ThreadState *dead_thread_state = nullptr;
+
+ThreadState *cur_thread() {
+  ThreadState* thr = reinterpret_cast<ThreadState*>(*get_android_tls_ptr());
+  if (thr == nullptr) {
+    __sanitizer_sigset_t emptyset;
+    internal_sigfillset(&emptyset);
+    __sanitizer_sigset_t oldset;
+    CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &emptyset, &oldset));
+    thr = reinterpret_cast<ThreadState*>(*get_android_tls_ptr());
+    if (thr == nullptr) {
+      thr = reinterpret_cast<ThreadState*>(MmapOrDie(sizeof(ThreadState),
+                                                     "ThreadState"));
+      *get_android_tls_ptr() = reinterpret_cast<uptr>(thr);
+      if (dead_thread_state == nullptr) {
+        dead_thread_state = reinterpret_cast<ThreadState*>(
+            MmapOrDie(sizeof(ThreadState), "ThreadState"));
+        dead_thread_state->fast_state.SetIgnoreBit();
+        dead_thread_state->ignore_interceptors = 1;
+        dead_thread_state->is_dead = true;
+        *const_cast<u32*>(&dead_thread_state->tid) = -1;
+        CHECK_EQ(0, internal_mprotect(dead_thread_state, sizeof(ThreadState),
+                                      PROT_READ));
+      }
+    }
+    CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &oldset, nullptr));
+  }
+  return thr;
+}
+
+void set_cur_thread(ThreadState *thr) {
+  *get_android_tls_ptr() = reinterpret_cast<uptr>(thr);
+}
+
+void cur_thread_finalize() {
+  __sanitizer_sigset_t emptyset;
+  internal_sigfillset(&emptyset);
+  __sanitizer_sigset_t oldset;
+  CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &emptyset, &oldset));
+  ThreadState* thr = reinterpret_cast<ThreadState*>(*get_android_tls_ptr());
+  if (thr != dead_thread_state) {
+    *get_android_tls_ptr() = reinterpret_cast<uptr>(dead_thread_state);
+    UnmapOrDie(thr, sizeof(ThreadState));
+  }
+  CHECK_EQ(0, internal_sigprocmask(SIG_SETMASK, &oldset, nullptr));
+}
+#endif  // SANITIZER_ANDROID
+#endif  // if !SANITIZER_GO
+
+}  // namespace __tsan
+
+#endif  // SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_platform_mac.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_platform_mac.cpp
new file mode 100644
index 0000000000000..1465f9953c193
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_platform_mac.cpp
@@ -0,0 +1,326 @@
+//===-- tsan_platform_mac.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Mac-specific code.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_MAC
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_posix.h"
+#include "sanitizer_common/sanitizer_procmaps.h"
+#include "sanitizer_common/sanitizer_ptrauth.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+#include "tsan_flags.h"
+
+#include <limits.h>
+#include <mach/mach.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sched.h>
+
+namespace __tsan {
+
+#if !SANITIZER_GO
+static char main_thread_state[sizeof(ThreadState)] ALIGNED(
+    SANITIZER_CACHE_LINE_SIZE);
+static ThreadState *dead_thread_state;
+static pthread_key_t thread_state_key;
+
+// We rely on the following documented, but Darwin-specific behavior to keep the
+// reference to the ThreadState object alive in TLS:
+// pthread_key_create man page:
+//   If, after all the destructors have been called for all non-NULL values with
+//   associated destructors, there are still some non-NULL values with
+//   associated destructors, then the process is repeated.  If, after at least
+//   [PTHREAD_DESTRUCTOR_ITERATIONS] iterations of destructor calls for
+//   outstanding non-NULL values, there are still some non-NULL values with
+//   associated destructors, the implementation stops calling destructors.
+static_assert(PTHREAD_DESTRUCTOR_ITERATIONS == 4, "Small number of iterations");
+static void ThreadStateDestructor(void *thr) {
+  int res = pthread_setspecific(thread_state_key, thr);
+  CHECK_EQ(res, 0);
+}
+
+static void InitializeThreadStateStorage() {
+  int res;
+  CHECK_EQ(thread_state_key, 0);
+  res = pthread_key_create(&thread_state_key, ThreadStateDestructor);
+  CHECK_EQ(res, 0);
+  res = pthread_setspecific(thread_state_key, main_thread_state);
+  CHECK_EQ(res, 0);
+
+  auto dts = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState");
+  dts->fast_state.SetIgnoreBit();
+  dts->ignore_interceptors = 1;
+  dts->is_dead = true;
+  const_cast<Tid &>(dts->tid) = kInvalidTid;
+  res = internal_mprotect(dts, sizeof(ThreadState), PROT_READ);  // immutable
+  CHECK_EQ(res, 0);
+  dead_thread_state = dts;
+}
+
+ThreadState *cur_thread() {
+  // Some interceptors get called before libpthread has been initialized and in
+  // these cases we must avoid calling any pthread APIs.
+  if (UNLIKELY(!thread_state_key)) {
+    return (ThreadState *)main_thread_state;
+  }
+
+  // We only reach this line after InitializeThreadStateStorage() ran, i.e,
+  // after TSan (and therefore libpthread) have been initialized.
+  ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key);
+  if (UNLIKELY(!thr)) {
+    thr = (ThreadState *)MmapOrDie(sizeof(ThreadState), "ThreadState");
+    int res = pthread_setspecific(thread_state_key, thr);
+    CHECK_EQ(res, 0);
+  }
+  return thr;
+}
+
+void set_cur_thread(ThreadState *thr) {
+  int res = pthread_setspecific(thread_state_key, thr);
+  CHECK_EQ(res, 0);
+}
+
+void cur_thread_finalize() {
+  ThreadState *thr = (ThreadState *)pthread_getspecific(thread_state_key);
+  CHECK(thr);
+  if (thr == (ThreadState *)main_thread_state) {
+    // Calling dispatch_main() or xpc_main() actually invokes pthread_exit to
+    // exit the main thread. Let's keep the main thread's ThreadState.
+    return;
+  }
+  // Intercepted functions can still get called after cur_thread_finalize()
+  // (called from DestroyThreadState()), so put a fake thread state for "dead"
+  // threads.  An alternative solution would be to release the ThreadState
+  // object from THREAD_DESTROY (which is delivered later and on the parent
+  // thread) instead of THREAD_TERMINATE.
+  int res = pthread_setspecific(thread_state_key, dead_thread_state);
+  CHECK_EQ(res, 0);
+  UnmapOrDie(thr, sizeof(ThreadState));
+}
+#endif
+
+void FlushShadowMemory() {
+}
+
+static void RegionMemUsage(uptr start, uptr end, uptr *res, uptr *dirty) {
+  vm_address_t address = start;
+  vm_address_t end_address = end;
+  uptr resident_pages = 0;
+  uptr dirty_pages = 0;
+  while (address < end_address) {
+    vm_size_t vm_region_size;
+    mach_msg_type_number_t count = VM_REGION_EXTENDED_INFO_COUNT;
+    vm_region_extended_info_data_t vm_region_info;
+    mach_port_t object_name;
+    kern_return_t ret = vm_region_64(
+        mach_task_self(), &address, &vm_region_size, VM_REGION_EXTENDED_INFO,
+        (vm_region_info_t)&vm_region_info, &count, &object_name);
+    if (ret != KERN_SUCCESS) break;
+
+    resident_pages += vm_region_info.pages_resident;
+    dirty_pages += vm_region_info.pages_dirtied;
+
+    address += vm_region_size;
+  }
+  *res = resident_pages * GetPageSizeCached();
+  *dirty = dirty_pages * GetPageSizeCached();
+}
+
+void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {
+  uptr shadow_res, shadow_dirty;
+  uptr meta_res, meta_dirty;
+  uptr trace_res, trace_dirty;
+  RegionMemUsage(ShadowBeg(), ShadowEnd(), &shadow_res, &shadow_dirty);
+  RegionMemUsage(MetaShadowBeg(), MetaShadowEnd(), &meta_res, &meta_dirty);
+  RegionMemUsage(TraceMemBeg(), TraceMemEnd(), &trace_res, &trace_dirty);
+
+#if !SANITIZER_GO
+  uptr low_res, low_dirty;
+  uptr high_res, high_dirty;
+  uptr heap_res, heap_dirty;
+  RegionMemUsage(LoAppMemBeg(), LoAppMemEnd(), &low_res, &low_dirty);
+  RegionMemUsage(HiAppMemBeg(), HiAppMemEnd(), &high_res, &high_dirty);
+  RegionMemUsage(HeapMemBeg(), HeapMemEnd(), &heap_res, &heap_dirty);
+#else  // !SANITIZER_GO
+  uptr app_res, app_dirty;
+  RegionMemUsage(LoAppMemBeg(), LoAppMemEnd(), &app_res, &app_dirty);
+#endif
+
+  StackDepotStats stacks = StackDepotGetStats();
+  uptr nthread, nlive;
+  ctx->thread_registry.GetNumberOfThreads(&nthread, &nlive);
+  internal_snprintf(
+      buf, buf_size,
+      "shadow   (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+      "meta     (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+      "traces   (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+#  if !SANITIZER_GO
+      "low app  (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+      "high app (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+      "heap     (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+#  else  // !SANITIZER_GO
+      "app      (0x%016zx-0x%016zx): resident %zd kB, dirty %zd kB\n"
+#  endif
+      "stacks: %zd unique IDs, %zd kB allocated\n"
+      "threads: %zd total, %zd live\n"
+      "------------------------------\n",
+      ShadowBeg(), ShadowEnd(), shadow_res / 1024, shadow_dirty / 1024,
+      MetaShadowBeg(), MetaShadowEnd(), meta_res / 1024, meta_dirty / 1024,
+      TraceMemBeg(), TraceMemEnd(), trace_res / 1024, trace_dirty / 1024,
+#  if !SANITIZER_GO
+      LoAppMemBeg(), LoAppMemEnd(), low_res / 1024, low_dirty / 1024,
+      HiAppMemBeg(), HiAppMemEnd(), high_res / 1024, high_dirty / 1024,
+      HeapMemBeg(), HeapMemEnd(), heap_res / 1024, heap_dirty / 1024,
+#  else  // !SANITIZER_GO
+      LoAppMemBeg(), LoAppMemEnd(), app_res / 1024, app_dirty / 1024,
+#  endif
+      stacks.n_uniq_ids, stacks.allocated / 1024, nthread, nlive);
+}
+
+#  if !SANITIZER_GO
+void InitializeShadowMemoryPlatform() { }
+
+// On OS X, GCD worker threads are created without a call to pthread_create. We
+// need to properly register these threads with ThreadCreate and ThreadStart.
+// These threads don't have a parent thread, as they are created "spuriously".
+// We're using a libpthread API that notifies us about a newly created thread.
+// The `thread == pthread_self()` check indicates this is actually a worker
+// thread. If it's just a regular thread, this hook is called on the parent
+// thread.
+typedef void (*pthread_introspection_hook_t)(unsigned int event,
+                                             pthread_t thread, void *addr,
+                                             size_t size);
+extern "C" pthread_introspection_hook_t pthread_introspection_hook_install(
+    pthread_introspection_hook_t hook);
+static const uptr PTHREAD_INTROSPECTION_THREAD_CREATE = 1;
+static const uptr PTHREAD_INTROSPECTION_THREAD_TERMINATE = 3;
+static pthread_introspection_hook_t prev_pthread_introspection_hook;
+static void my_pthread_introspection_hook(unsigned int event, pthread_t thread,
+                                          void *addr, size_t size) {
+  if (event == PTHREAD_INTROSPECTION_THREAD_CREATE) {
+    if (thread == pthread_self()) {
+      // The current thread is a newly created GCD worker thread.
+      ThreadState *thr = cur_thread();
+      Processor *proc = ProcCreate();
+      ProcWire(proc, thr);
+      ThreadState *parent_thread_state = nullptr;  // No parent.
+      Tid tid = ThreadCreate(parent_thread_state, 0, (uptr)thread, true);
+      CHECK_NE(tid, kMainTid);
+      ThreadStart(thr, tid, GetTid(), ThreadType::Worker);
+    }
+  } else if (event == PTHREAD_INTROSPECTION_THREAD_TERMINATE) {
+    CHECK_EQ(thread, pthread_self());
+    ThreadState *thr = cur_thread();
+    if (thr->tctx) {
+      DestroyThreadState();
+    }
+  }
+
+  if (prev_pthread_introspection_hook != nullptr)
+    prev_pthread_introspection_hook(event, thread, addr, size);
+}
+#endif
+
+void InitializePlatformEarly() {
+#  if !SANITIZER_GO && SANITIZER_IOS
+  uptr max_vm = GetMaxUserVirtualAddress() + 1;
+  if (max_vm != HiAppMemEnd()) {
+    Printf("ThreadSanitizer: unsupported vm address limit %p, expected %p.\n",
+           (void *)max_vm, (void *)HiAppMemEnd());
+    Die();
+  }
+#endif
+}
+
+static uptr longjmp_xor_key = 0;
+
+void InitializePlatform() {
+  DisableCoreDumperIfNecessary();
+#if !SANITIZER_GO
+  CheckAndProtect();
+
+  InitializeThreadStateStorage();
+
+  prev_pthread_introspection_hook =
+      pthread_introspection_hook_install(&my_pthread_introspection_hook);
+#endif
+
+  if (GetMacosAlignedVersion() >= MacosVersion(10, 14)) {
+    // Libsystem currently uses a process-global key; this might change.
+    const unsigned kTLSLongjmpXorKeySlot = 0x7;
+    longjmp_xor_key = (uptr)pthread_getspecific(kTLSLongjmpXorKeySlot);
+  }
+}
+
+#ifdef __aarch64__
+# define LONG_JMP_SP_ENV_SLOT \
+    ((GetMacosAlignedVersion() >= MacosVersion(10, 14)) ? 12 : 13)
+#else
+# define LONG_JMP_SP_ENV_SLOT 2
+#endif
+
+uptr ExtractLongJmpSp(uptr *env) {
+  uptr mangled_sp = env[LONG_JMP_SP_ENV_SLOT];
+  uptr sp = mangled_sp ^ longjmp_xor_key;
+  sp = (uptr)ptrauth_auth_data((void *)sp, ptrauth_key_asdb,
+                               ptrauth_string_discriminator("sp"));
+  return sp;
+}
+
+#if !SANITIZER_GO
+extern "C" void __tsan_tls_initialization() {}
+
+void ImitateTlsWrite(ThreadState *thr, uptr tls_addr, uptr tls_size) {
+  const uptr pc = StackTrace::GetNextInstructionPc(
+      reinterpret_cast<uptr>(__tsan_tls_initialization));
+  // Unlike Linux, we only store a pointer to the ThreadState object in TLS;
+  // just mark the entire range as written to.
+  MemoryRangeImitateWrite(thr, pc, tls_addr, tls_size);
+}
+#endif
+
+#if !SANITIZER_GO
+// Note: this function runs with async signals enabled,
+// so it must not touch any tsan state.
+int call_pthread_cancel_with_cleanup(int (*fn)(void *arg),
+                                     void (*cleanup)(void *arg), void *arg) {
+  // pthread_cleanup_push/pop are hardcore macros mess.
+  // We can't intercept nor call them w/o including pthread.h.
+  int res;
+  pthread_cleanup_push(cleanup, arg);
+  res = fn(arg);
+  pthread_cleanup_pop(0);
+  return res;
+}
+#endif
+
+}  // namespace __tsan
+
+#endif  // SANITIZER_MAC

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_platform_posix.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_platform_posix.cpp
new file mode 100644
index 0000000000000..763ac444377e0
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_platform_posix.cpp
@@ -0,0 +1,147 @@
+//===-- tsan_platform_posix.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// POSIX-specific code.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_POSIX
+
+#  include <dlfcn.h>
+
+#  include "sanitizer_common/sanitizer_common.h"
+#  include "sanitizer_common/sanitizer_errno.h"
+#  include "sanitizer_common/sanitizer_libc.h"
+#  include "sanitizer_common/sanitizer_procmaps.h"
+#  include "tsan_platform.h"
+#  include "tsan_rtl.h"
+
+namespace __tsan {
+
+static const char kShadowMemoryMappingWarning[] =
+    "FATAL: %s can not madvise shadow region [%zx, %zx] with %s (errno: %d)\n";
+static const char kShadowMemoryMappingHint[] =
+    "HINT: if %s is not supported in your environment, you may set "
+    "TSAN_OPTIONS=%s=0\n";
+
+#  if !SANITIZER_GO
+static void DontDumpShadow(uptr addr, uptr size) {
+  if (common_flags()->use_madv_dontdump)
+    if (!DontDumpShadowMemory(addr, size)) {
+      Printf(kShadowMemoryMappingWarning, SanitizerToolName, addr, addr + size,
+             "MADV_DONTDUMP", errno);
+      Printf(kShadowMemoryMappingHint, "MADV_DONTDUMP", "use_madv_dontdump");
+      Die();
+    }
+}
+
+void InitializeShadowMemory() {
+  // Map memory shadow.
+  if (!MmapFixedSuperNoReserve(ShadowBeg(), ShadowEnd() - ShadowBeg(),
+                               "shadow")) {
+    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
+    Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n");
+    Die();
+  }
+  // This memory range is used for thread stacks and large user mmaps.
+  // Frequently a thread uses only a small part of stack and similarly
+  // a program uses a small part of large mmap. On some programs
+  // we see 20% memory usage reduction without huge pages for this range.
+  DontDumpShadow(ShadowBeg(), ShadowEnd() - ShadowBeg());
+  DPrintf("memory shadow: %zx-%zx (%zuGB)\n",
+      ShadowBeg(), ShadowEnd(),
+      (ShadowEnd() - ShadowBeg()) >> 30);
+
+  // Map meta shadow.
+  const uptr meta = MetaShadowBeg();
+  const uptr meta_size = MetaShadowEnd() - meta;
+  if (!MmapFixedSuperNoReserve(meta, meta_size, "meta shadow")) {
+    Printf("FATAL: ThreadSanitizer can not mmap the shadow memory\n");
+    Printf("FATAL: Make sure to compile with -fPIE and to link with -pie.\n");
+    Die();
+  }
+  DontDumpShadow(meta, meta_size);
+  DPrintf("meta shadow: %zx-%zx (%zuGB)\n",
+      meta, meta + meta_size, meta_size >> 30);
+
+  InitializeShadowMemoryPlatform();
+
+  on_initialize = reinterpret_cast<void (*)(void)>(
+      dlsym(RTLD_DEFAULT, "__tsan_on_initialize"));
+  on_finalize =
+      reinterpret_cast<int (*)(int)>(dlsym(RTLD_DEFAULT, "__tsan_on_finalize"));
+}
+
+static bool TryProtectRange(uptr beg, uptr end) {
+  CHECK_LE(beg, end);
+  if (beg == end)
+    return true;
+  return beg == (uptr)MmapFixedNoAccess(beg, end - beg);
+}
+
+static void ProtectRange(uptr beg, uptr end) {
+  if (!TryProtectRange(beg, end)) {
+    Printf("FATAL: ThreadSanitizer can not protect [%zx,%zx]\n", beg, end);
+    Printf("FATAL: Make sure you are not using unlimited stack\n");
+    Die();
+  }
+}
+
+void CheckAndProtect() {
+  // Ensure that the binary is indeed compiled with -pie.
+  MemoryMappingLayout proc_maps(true);
+  MemoryMappedSegment segment;
+  while (proc_maps.Next(&segment)) {
+    if (IsAppMem(segment.start)) continue;
+    if (segment.start >= HeapMemEnd() && segment.start < HeapEnd()) continue;
+    if (segment.protection == 0)  // Zero page or mprotected.
+      continue;
+    if (segment.start >= VdsoBeg())  // vdso
+      break;
+    Printf("FATAL: ThreadSanitizer: unexpected memory mapping 0x%zx-0x%zx\n",
+           segment.start, segment.end);
+    Die();
+  }
+
+#    if defined(__aarch64__) && defined(__APPLE__) && SANITIZER_IOS
+  ProtectRange(HeapMemEnd(), ShadowBeg());
+  ProtectRange(ShadowEnd(), MetaShadowBeg());
+  ProtectRange(MetaShadowEnd(), TraceMemBeg());
+#else
+  ProtectRange(LoAppMemEnd(), ShadowBeg());
+  ProtectRange(ShadowEnd(), MetaShadowBeg());
+  if (MidAppMemBeg()) {
+    ProtectRange(MetaShadowEnd(), MidAppMemBeg());
+    ProtectRange(MidAppMemEnd(), TraceMemBeg());
+  } else {
+    ProtectRange(MetaShadowEnd(), TraceMemBeg());
+  }
+  // Memory for traces is mapped lazily in MapThreadTrace.
+  // Protect the whole range for now, so that user does not map something here.
+  ProtectRange(TraceMemBeg(), TraceMemEnd());
+  ProtectRange(TraceMemEnd(), HeapMemBeg());
+  ProtectRange(HeapEnd(), HiAppMemBeg());
+#endif
+
+#if defined(__s390x__)
+  // Protect the rest of the address space.
+  const uptr user_addr_max_l4 = 0x0020000000000000ull;
+  const uptr user_addr_max_l5 = 0xfffffffffffff000ull;
+  // All the maintained s390x kernels support at least 4-level page tables.
+  ProtectRange(HiAppMemEnd(), user_addr_max_l4);
+  // Older s390x kernels may not support 5-level page tables.
+  TryProtectRange(user_addr_max_l4, user_addr_max_l5);
+#endif
+}
+#endif
+
+}  // namespace __tsan
+
+#endif  // SANITIZER_POSIX

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_platform_windows.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_platform_windows.cpp
new file mode 100644
index 0000000000000..fea893768c79f
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_platform_windows.cpp
@@ -0,0 +1,36 @@
+//===-- tsan_platform_windows.cpp -----------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Windows-specific code.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_platform.h"
+#if SANITIZER_WINDOWS
+
+#include "tsan_platform.h"
+
+#include <stdlib.h>
+
+namespace __tsan {
+
+void FlushShadowMemory() {
+}
+
+void WriteMemoryProfile(char *buf, uptr buf_size, u64 uptime_ns) {}
+
+void InitializePlatformEarly() {
+}
+
+void InitializePlatform() {
+}
+
+}  // namespace __tsan
+
+#endif  // SANITIZER_WINDOWS

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_ppc_regs.h b/compiler-rt/lib/tsan/rtl-old/tsan_ppc_regs.h
new file mode 100644
index 0000000000000..5b43f3ddada3f
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_ppc_regs.h
@@ -0,0 +1,96 @@
+#define r0 0
+#define r1 1
+#define r2 2
+#define r3 3
+#define r4 4
+#define r5 5
+#define r6 6
+#define r7 7
+#define r8 8
+#define r9 9
+#define r10 10
+#define r11 11
+#define r12 12
+#define r13 13
+#define r14 14
+#define r15 15
+#define r16 16
+#define r17 17
+#define r18 18
+#define r19 19
+#define r20 20
+#define r21 21
+#define r22 22
+#define r23 23
+#define r24 24
+#define r25 25
+#define r26 26
+#define r27 27
+#define r28 28
+#define r29 29
+#define r30 30
+#define r31 31
+#define f0 0
+#define f1 1
+#define f2 2
+#define f3 3
+#define f4 4
+#define f5 5
+#define f6 6
+#define f7 7
+#define f8 8
+#define f9 9
+#define f10 10
+#define f11 11
+#define f12 12
+#define f13 13
+#define f14 14
+#define f15 15
+#define f16 16
+#define f17 17
+#define f18 18
+#define f19 19
+#define f20 20
+#define f21 21
+#define f22 22
+#define f23 23
+#define f24 24
+#define f25 25
+#define f26 26
+#define f27 27
+#define f28 28
+#define f29 29
+#define f30 30
+#define f31 31
+#define v0 0
+#define v1 1
+#define v2 2
+#define v3 3
+#define v4 4
+#define v5 5
+#define v6 6
+#define v7 7
+#define v8 8
+#define v9 9
+#define v10 10
+#define v11 11
+#define v12 12
+#define v13 13
+#define v14 14
+#define v15 15
+#define v16 16
+#define v17 17
+#define v18 18
+#define v19 19
+#define v20 20
+#define v21 21
+#define v22 22
+#define v23 23
+#define v24 24
+#define v25 25
+#define v26 26
+#define v27 27
+#define v28 28
+#define v29 29
+#define v30 30
+#define v31 31

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_preinit.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_preinit.cpp
new file mode 100644
index 0000000000000..205bdbf93b201
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_preinit.cpp
@@ -0,0 +1,26 @@
+//===-- tsan_preinit.cpp --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer.
+//
+// Call __tsan_init at the very early stage of process startup.
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "tsan_interface.h"
+
+#if SANITIZER_CAN_USE_PREINIT_ARRAY
+
+// The symbol is called __local_tsan_preinit, because it's not intended to be
+// exported.
+// This code linked into the main executable when -fsanitize=thread is in
+// the link flags. It can only use exported interface functions.
+__attribute__((section(".preinit_array"), used))
+void (*__local_tsan_preinit)(void) = __tsan_init;
+
+#endif

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_report.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_report.cpp
new file mode 100644
index 0000000000000..a926c3761ccf9
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_report.cpp
@@ -0,0 +1,479 @@
+//===-- tsan_report.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_report.h"
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+#include "sanitizer_common/sanitizer_file.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_report_decorator.h"
+#include "sanitizer_common/sanitizer_stacktrace_printer.h"
+
+namespace __tsan {
+
+class Decorator: public __sanitizer::SanitizerCommonDecorator {
+ public:
+  Decorator() : SanitizerCommonDecorator() { }
+  const char *Access()     { return Blue(); }
+  const char *ThreadDescription()    { return Cyan(); }
+  const char *Location()   { return Green(); }
+  const char *Sleep()   { return Yellow(); }
+  const char *Mutex()   { return Magenta(); }
+};
+
+ReportDesc::ReportDesc()
+    : tag(kExternalTagNone)
+    , stacks()
+    , mops()
+    , locs()
+    , mutexes()
+    , threads()
+    , unique_tids()
+    , sleep()
+    , count() {
+}
+
+ReportMop::ReportMop()
+    : mset() {
+}
+
+ReportDesc::~ReportDesc() {
+  // FIXME(dvyukov): it must be leaking a lot of memory.
+}
+
+#if !SANITIZER_GO
+
+const int kThreadBufSize = 32;
+const char *thread_name(char *buf, Tid tid) {
+  if (tid == kMainTid)
+    return "main thread";
+  internal_snprintf(buf, kThreadBufSize, "thread T%d", tid);
+  return buf;
+}
+
+static const char *ReportTypeString(ReportType typ, uptr tag) {
+  switch (typ) {
+    case ReportTypeRace:
+      return "data race";
+    case ReportTypeVptrRace:
+      return "data race on vptr (ctor/dtor vs virtual call)";
+    case ReportTypeUseAfterFree:
+      return "heap-use-after-free";
+    case ReportTypeVptrUseAfterFree:
+      return "heap-use-after-free (virtual call vs free)";
+    case ReportTypeExternalRace: {
+      const char *str = GetReportHeaderFromTag(tag);
+      return str ? str : "race on external object";
+    }
+    case ReportTypeThreadLeak:
+      return "thread leak";
+    case ReportTypeMutexDestroyLocked:
+      return "destroy of a locked mutex";
+    case ReportTypeMutexDoubleLock:
+      return "double lock of a mutex";
+    case ReportTypeMutexInvalidAccess:
+      return "use of an invalid mutex (e.g. uninitialized or destroyed)";
+    case ReportTypeMutexBadUnlock:
+      return "unlock of an unlocked mutex (or by a wrong thread)";
+    case ReportTypeMutexBadReadLock:
+      return "read lock of a write locked mutex";
+    case ReportTypeMutexBadReadUnlock:
+      return "read unlock of a write locked mutex";
+    case ReportTypeSignalUnsafe:
+      return "signal-unsafe call inside of a signal";
+    case ReportTypeErrnoInSignal:
+      return "signal handler spoils errno";
+    case ReportTypeDeadlock:
+      return "lock-order-inversion (potential deadlock)";
+    // No default case so compiler warns us if we miss one
+  }
+  UNREACHABLE("missing case");
+}
+
+#if SANITIZER_MAC
+static const char *const kInterposedFunctionPrefix = "wrap_";
+#else
+static const char *const kInterposedFunctionPrefix = "__interceptor_";
+#endif
+
+void PrintStack(const ReportStack *ent) {
+  if (ent == 0 || ent->frames == 0) {
+    Printf("    [failed to restore the stack]\n\n");
+    return;
+  }
+  SymbolizedStack *frame = ent->frames;
+  for (int i = 0; frame && frame->info.address; frame = frame->next, i++) {
+    InternalScopedString res;
+    RenderFrame(&res, common_flags()->stack_trace_format, i,
+                frame->info.address, &frame->info,
+                common_flags()->symbolize_vs_style,
+                common_flags()->strip_path_prefix, kInterposedFunctionPrefix);
+    Printf("%s\n", res.data());
+  }
+  Printf("\n");
+}
+
+static void PrintMutexSet(Vector<ReportMopMutex> const& mset) {
+  for (uptr i = 0; i < mset.Size(); i++) {
+    if (i == 0)
+      Printf(" (mutexes:");
+    const ReportMopMutex m = mset[i];
+    Printf(" %s M%llu", m.write ? "write" : "read", m.id);
+    Printf(i == mset.Size() - 1 ? ")" : ",");
+  }
+}
+
+static const char *MopDesc(bool first, bool write, bool atomic) {
+  return atomic ? (first ? (write ? "Atomic write" : "Atomic read")
+                : (write ? "Previous atomic write" : "Previous atomic read"))
+                : (first ? (write ? "Write" : "Read")
+                : (write ? "Previous write" : "Previous read"));
+}
+
+static const char *ExternalMopDesc(bool first, bool write) {
+  return first ? (write ? "Modifying" : "Read-only")
+               : (write ? "Previous modifying" : "Previous read-only");
+}
+
+static void PrintMop(const ReportMop *mop, bool first) {
+  Decorator d;
+  char thrbuf[kThreadBufSize];
+  Printf("%s", d.Access());
+  if (mop->external_tag == kExternalTagNone) {
+    Printf("  %s of size %d at %p by %s",
+           MopDesc(first, mop->write, mop->atomic), mop->size,
+           (void *)mop->addr, thread_name(thrbuf, mop->tid));
+  } else {
+    const char *object_type = GetObjectTypeFromTag(mop->external_tag);
+    if (object_type == nullptr)
+        object_type = "external object";
+    Printf("  %s access of %s at %p by %s",
+           ExternalMopDesc(first, mop->write), object_type,
+           (void *)mop->addr, thread_name(thrbuf, mop->tid));
+  }
+  PrintMutexSet(mop->mset);
+  Printf(":\n");
+  Printf("%s", d.Default());
+  PrintStack(mop->stack);
+}
+
+static void PrintLocation(const ReportLocation *loc) {
+  Decorator d;
+  char thrbuf[kThreadBufSize];
+  bool print_stack = false;
+  Printf("%s", d.Location());
+  if (loc->type == ReportLocationGlobal) {
+    const DataInfo &global = loc->global;
+    if (global.size != 0)
+      Printf("  Location is global '%s' of size %zu at %p (%s+0x%zx)\n\n",
+             global.name, global.size, reinterpret_cast<void *>(global.start),
+             StripModuleName(global.module), global.module_offset);
+    else
+      Printf("  Location is global '%s' at %p (%s+0x%zx)\n\n", global.name,
+             reinterpret_cast<void *>(global.start),
+             StripModuleName(global.module), global.module_offset);
+  } else if (loc->type == ReportLocationHeap) {
+    char thrbuf[kThreadBufSize];
+    const char *object_type = GetObjectTypeFromTag(loc->external_tag);
+    if (!object_type) {
+      Printf("  Location is heap block of size %zu at %p allocated by %s:\n",
+             loc->heap_chunk_size,
+             reinterpret_cast<void *>(loc->heap_chunk_start),
+             thread_name(thrbuf, loc->tid));
+    } else {
+      Printf("  Location is %s of size %zu at %p allocated by %s:\n",
+             object_type, loc->heap_chunk_size,
+             reinterpret_cast<void *>(loc->heap_chunk_start),
+             thread_name(thrbuf, loc->tid));
+    }
+    print_stack = true;
+  } else if (loc->type == ReportLocationStack) {
+    Printf("  Location is stack of %s.\n\n", thread_name(thrbuf, loc->tid));
+  } else if (loc->type == ReportLocationTLS) {
+    Printf("  Location is TLS of %s.\n\n", thread_name(thrbuf, loc->tid));
+  } else if (loc->type == ReportLocationFD) {
+    Printf("  Location is file descriptor %d created by %s at:\n",
+        loc->fd, thread_name(thrbuf, loc->tid));
+    print_stack = true;
+  }
+  Printf("%s", d.Default());
+  if (print_stack)
+    PrintStack(loc->stack);
+}
+
+static void PrintMutexShort(const ReportMutex *rm, const char *after) {
+  Decorator d;
+  Printf("%sM%lld%s%s", d.Mutex(), rm->id, d.Default(), after);
+}
+
+static void PrintMutexShortWithAddress(const ReportMutex *rm,
+                                       const char *after) {
+  Decorator d;
+  Printf("%sM%lld (%p)%s%s", d.Mutex(), rm->id,
+         reinterpret_cast<void *>(rm->addr), d.Default(), after);
+}
+
+static void PrintMutex(const ReportMutex *rm) {
+  Decorator d;
+  if (rm->destroyed) {
+    Printf("%s", d.Mutex());
+    Printf("  Mutex M%llu is already destroyed.\n\n", rm->id);
+    Printf("%s", d.Default());
+  } else {
+    Printf("%s", d.Mutex());
+    Printf("  Mutex M%llu (%p) created at:\n", rm->id,
+           reinterpret_cast<void *>(rm->addr));
+    Printf("%s", d.Default());
+    PrintStack(rm->stack);
+  }
+}
+
+static void PrintThread(const ReportThread *rt) {
+  Decorator d;
+  if (rt->id == kMainTid)  // Little sense in describing the main thread.
+    return;
+  Printf("%s", d.ThreadDescription());
+  Printf("  Thread T%d", rt->id);
+  if (rt->name && rt->name[0] != '\0')
+    Printf(" '%s'", rt->name);
+  char thrbuf[kThreadBufSize];
+  const char *thread_status = rt->running ? "running" : "finished";
+  if (rt->thread_type == ThreadType::Worker) {
+    Printf(" (tid=%llu, %s) is a GCD worker thread\n", rt->os_id,
+           thread_status);
+    Printf("\n");
+    Printf("%s", d.Default());
+    return;
+  }
+  Printf(" (tid=%llu, %s) created by %s", rt->os_id, thread_status,
+         thread_name(thrbuf, rt->parent_tid));
+  if (rt->stack)
+    Printf(" at:");
+  Printf("\n");
+  Printf("%s", d.Default());
+  PrintStack(rt->stack);
+}
+
+static void PrintSleep(const ReportStack *s) {
+  Decorator d;
+  Printf("%s", d.Sleep());
+  Printf("  As if synchronized via sleep:\n");
+  Printf("%s", d.Default());
+  PrintStack(s);
+}
+
+static ReportStack *ChooseSummaryStack(const ReportDesc *rep) {
+  if (rep->mops.Size())
+    return rep->mops[0]->stack;
+  if (rep->stacks.Size())
+    return rep->stacks[0];
+  if (rep->mutexes.Size())
+    return rep->mutexes[0]->stack;
+  if (rep->threads.Size())
+    return rep->threads[0]->stack;
+  return 0;
+}
+
+static bool FrameIsInternal(const SymbolizedStack *frame) {
+  if (frame == 0)
+    return false;
+  const char *file = frame->info.file;
+  const char *module = frame->info.module;
+  if (file != 0 &&
+      (internal_strstr(file, "tsan_interceptors_posix.cpp") ||
+       internal_strstr(file, "sanitizer_common_interceptors.inc") ||
+       internal_strstr(file, "tsan_interface_")))
+    return true;
+  if (module != 0 && (internal_strstr(module, "libclang_rt.tsan_")))
+    return true;
+  return false;
+}
+
+static SymbolizedStack *SkipTsanInternalFrames(SymbolizedStack *frames) {
+  while (FrameIsInternal(frames) && frames->next)
+    frames = frames->next;
+  return frames;
+}
+
+void PrintReport(const ReportDesc *rep) {
+  Decorator d;
+  Printf("==================\n");
+  const char *rep_typ_str = ReportTypeString(rep->typ, rep->tag);
+  Printf("%s", d.Warning());
+  Printf("WARNING: ThreadSanitizer: %s (pid=%d)\n", rep_typ_str,
+         (int)internal_getpid());
+  Printf("%s", d.Default());
+
+  if (rep->typ == ReportTypeDeadlock) {
+    char thrbuf[kThreadBufSize];
+    Printf("  Cycle in lock order graph: ");
+    for (uptr i = 0; i < rep->mutexes.Size(); i++)
+      PrintMutexShortWithAddress(rep->mutexes[i], " => ");
+    PrintMutexShort(rep->mutexes[0], "\n\n");
+    CHECK_GT(rep->mutexes.Size(), 0U);
+    CHECK_EQ(rep->mutexes.Size() * (flags()->second_deadlock_stack ? 2 : 1),
+             rep->stacks.Size());
+    for (uptr i = 0; i < rep->mutexes.Size(); i++) {
+      Printf("  Mutex ");
+      PrintMutexShort(rep->mutexes[(i + 1) % rep->mutexes.Size()],
+                      " acquired here while holding mutex ");
+      PrintMutexShort(rep->mutexes[i], " in ");
+      Printf("%s", d.ThreadDescription());
+      Printf("%s:\n", thread_name(thrbuf, rep->unique_tids[i]));
+      Printf("%s", d.Default());
+      if (flags()->second_deadlock_stack) {
+        PrintStack(rep->stacks[2*i]);
+        Printf("  Mutex ");
+        PrintMutexShort(rep->mutexes[i],
+                        " previously acquired by the same thread here:\n");
+        PrintStack(rep->stacks[2*i+1]);
+      } else {
+        PrintStack(rep->stacks[i]);
+        if (i == 0)
+          Printf("    Hint: use TSAN_OPTIONS=second_deadlock_stack=1 "
+                 "to get more informative warning message\n\n");
+      }
+    }
+  } else {
+    for (uptr i = 0; i < rep->stacks.Size(); i++) {
+      if (i)
+        Printf("  and:\n");
+      PrintStack(rep->stacks[i]);
+    }
+  }
+
+  for (uptr i = 0; i < rep->mops.Size(); i++)
+    PrintMop(rep->mops[i], i == 0);
+
+  if (rep->sleep)
+    PrintSleep(rep->sleep);
+
+  for (uptr i = 0; i < rep->locs.Size(); i++)
+    PrintLocation(rep->locs[i]);
+
+  if (rep->typ != ReportTypeDeadlock) {
+    for (uptr i = 0; i < rep->mutexes.Size(); i++)
+      PrintMutex(rep->mutexes[i]);
+  }
+
+  for (uptr i = 0; i < rep->threads.Size(); i++)
+    PrintThread(rep->threads[i]);
+
+  if (rep->typ == ReportTypeThreadLeak && rep->count > 1)
+    Printf("  And %d more similar thread leaks.\n\n", rep->count - 1);
+
+  if (ReportStack *stack = ChooseSummaryStack(rep)) {
+    if (SymbolizedStack *frame = SkipTsanInternalFrames(stack->frames))
+      ReportErrorSummary(rep_typ_str, frame->info);
+  }
+
+  if (common_flags()->print_module_map == 2)
+    DumpProcessMap();
+
+  Printf("==================\n");
+}
+
+#else  // #if !SANITIZER_GO
+
+const Tid kMainGoroutineId = 1;
+
+void PrintStack(const ReportStack *ent) {
+  if (ent == 0 || ent->frames == 0) {
+    Printf("  [failed to restore the stack]\n");
+    return;
+  }
+  SymbolizedStack *frame = ent->frames;
+  for (int i = 0; frame; frame = frame->next, i++) {
+    const AddressInfo &info = frame->info;
+    Printf("  %s()\n      %s:%d +0x%zx\n", info.function,
+           StripPathPrefix(info.file, common_flags()->strip_path_prefix),
+           info.line, info.module_offset);
+  }
+}
+
+static void PrintMop(const ReportMop *mop, bool first) {
+  Printf("\n");
+  Printf("%s at %p by ",
+         (first ? (mop->write ? "Write" : "Read")
+                : (mop->write ? "Previous write" : "Previous read")),
+         reinterpret_cast<void *>(mop->addr));
+  if (mop->tid == kMainGoroutineId)
+    Printf("main goroutine:\n");
+  else
+    Printf("goroutine %d:\n", mop->tid);
+  PrintStack(mop->stack);
+}
+
+static void PrintLocation(const ReportLocation *loc) {
+  switch (loc->type) {
+  case ReportLocationHeap: {
+    Printf("\n");
+    Printf("Heap block of size %zu at %p allocated by ", loc->heap_chunk_size,
+           reinterpret_cast<void *>(loc->heap_chunk_start));
+    if (loc->tid == kMainGoroutineId)
+      Printf("main goroutine:\n");
+    else
+      Printf("goroutine %d:\n", loc->tid);
+    PrintStack(loc->stack);
+    break;
+  }
+  case ReportLocationGlobal: {
+    Printf("\n");
+    Printf("Global var %s of size %zu at %p declared at %s:%zu\n",
+           loc->global.name, loc->global.size,
+           reinterpret_cast<void *>(loc->global.start), loc->global.file,
+           loc->global.line);
+    break;
+  }
+  default:
+    break;
+  }
+}
+
+static void PrintThread(const ReportThread *rt) {
+  if (rt->id == kMainGoroutineId)
+    return;
+  Printf("\n");
+  Printf("Goroutine %d (%s) created at:\n",
+    rt->id, rt->running ? "running" : "finished");
+  PrintStack(rt->stack);
+}
+
+void PrintReport(const ReportDesc *rep) {
+  Printf("==================\n");
+  if (rep->typ == ReportTypeRace) {
+    Printf("WARNING: DATA RACE");
+    for (uptr i = 0; i < rep->mops.Size(); i++)
+      PrintMop(rep->mops[i], i == 0);
+    for (uptr i = 0; i < rep->locs.Size(); i++)
+      PrintLocation(rep->locs[i]);
+    for (uptr i = 0; i < rep->threads.Size(); i++)
+      PrintThread(rep->threads[i]);
+  } else if (rep->typ == ReportTypeDeadlock) {
+    Printf("WARNING: DEADLOCK\n");
+    for (uptr i = 0; i < rep->mutexes.Size(); i++) {
+      Printf("Goroutine %d lock mutex %llu while holding mutex %llu:\n", 999,
+             rep->mutexes[i]->id,
+             rep->mutexes[(i + 1) % rep->mutexes.Size()]->id);
+      PrintStack(rep->stacks[2*i]);
+      Printf("\n");
+      Printf("Mutex %llu was previously locked here:\n",
+             rep->mutexes[(i + 1) % rep->mutexes.Size()]->id);
+      PrintStack(rep->stacks[2*i + 1]);
+      Printf("\n");
+    }
+  }
+  Printf("==================\n");
+}
+
+#endif
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_report.h b/compiler-rt/lib/tsan/rtl-old/tsan_report.h
new file mode 100644
index 0000000000000..d68c2db88828f
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_report.h
@@ -0,0 +1,127 @@
+//===-- tsan_report.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_REPORT_H
+#define TSAN_REPORT_H
+
+#include "sanitizer_common/sanitizer_symbolizer.h"
+#include "sanitizer_common/sanitizer_thread_registry.h"
+#include "sanitizer_common/sanitizer_vector.h"
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+enum ReportType {
+  ReportTypeRace,
+  ReportTypeVptrRace,
+  ReportTypeUseAfterFree,
+  ReportTypeVptrUseAfterFree,
+  ReportTypeExternalRace,
+  ReportTypeThreadLeak,
+  ReportTypeMutexDestroyLocked,
+  ReportTypeMutexDoubleLock,
+  ReportTypeMutexInvalidAccess,
+  ReportTypeMutexBadUnlock,
+  ReportTypeMutexBadReadLock,
+  ReportTypeMutexBadReadUnlock,
+  ReportTypeSignalUnsafe,
+  ReportTypeErrnoInSignal,
+  ReportTypeDeadlock
+};
+
+struct ReportStack {
+  SymbolizedStack *frames = nullptr;
+  bool suppressable = false;
+};
+
+struct ReportMopMutex {
+  u64 id;
+  bool write;
+};
+
+struct ReportMop {
+  int tid;
+  uptr addr;
+  int size;
+  bool write;
+  bool atomic;
+  uptr external_tag;
+  Vector<ReportMopMutex> mset;
+  ReportStack *stack;
+
+  ReportMop();
+};
+
+enum ReportLocationType {
+  ReportLocationGlobal,
+  ReportLocationHeap,
+  ReportLocationStack,
+  ReportLocationTLS,
+  ReportLocationFD
+};
+
+struct ReportLocation {
+  ReportLocationType type = ReportLocationGlobal;
+  DataInfo global = {};
+  uptr heap_chunk_start = 0;
+  uptr heap_chunk_size = 0;
+  uptr external_tag = 0;
+  Tid tid = kInvalidTid;
+  int fd = 0;
+  bool suppressable = false;
+  ReportStack *stack = nullptr;
+};
+
+struct ReportThread {
+  Tid id;
+  tid_t os_id;
+  bool running;
+  ThreadType thread_type;
+  char *name;
+  Tid parent_tid;
+  ReportStack *stack;
+};
+
+struct ReportMutex {
+  u64 id;
+  uptr addr;
+  bool destroyed;
+  ReportStack *stack;
+};
+
+class ReportDesc {
+ public:
+  ReportType typ;
+  uptr tag;
+  Vector<ReportStack*> stacks;
+  Vector<ReportMop*> mops;
+  Vector<ReportLocation*> locs;
+  Vector<ReportMutex*> mutexes;
+  Vector<ReportThread*> threads;
+  Vector<Tid> unique_tids;
+  ReportStack *sleep;
+  int count;
+
+  ReportDesc();
+  ~ReportDesc();
+
+ private:
+  ReportDesc(const ReportDesc&);
+  void operator = (const ReportDesc&);
+};
+
+// Format and output the report to the console/log. No additional logic.
+void PrintReport(const ReportDesc *rep);
+void PrintStack(const ReportStack *stack);
+
+}  // namespace __tsan
+
+#endif  // TSAN_REPORT_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_rtl.cpp
new file mode 100644
index 0000000000000..c14af9788e32d
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl.cpp
@@ -0,0 +1,811 @@
+//===-- tsan_rtl.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Main file (entry points) for the TSan run-time.
+//===----------------------------------------------------------------------===//
+
+#include "tsan_rtl.h"
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_file.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_symbolizer.h"
+#include "tsan_defs.h"
+#include "tsan_interface.h"
+#include "tsan_mman.h"
+#include "tsan_platform.h"
+#include "tsan_suppressions.h"
+#include "tsan_symbolize.h"
+#include "ubsan/ubsan_init.h"
+
+volatile int __tsan_resumed = 0;
+
+extern "C" void __tsan_resume() {
+  __tsan_resumed = 1;
+}
+
+SANITIZER_WEAK_DEFAULT_IMPL
+void __tsan_test_only_on_fork() {}
+
+namespace __tsan {
+
+#if !SANITIZER_GO
+void (*on_initialize)(void);
+int (*on_finalize)(int);
+#endif
+
+#if !SANITIZER_GO && !SANITIZER_MAC
+__attribute__((tls_model("initial-exec")))
+THREADLOCAL char cur_thread_placeholder[sizeof(ThreadState)] ALIGNED(
+    SANITIZER_CACHE_LINE_SIZE);
+#endif
+static char ctx_placeholder[sizeof(Context)] ALIGNED(SANITIZER_CACHE_LINE_SIZE);
+Context *ctx;
+
+// Can be overriden by a front-end.
+#ifdef TSAN_EXTERNAL_HOOKS
+bool OnFinalize(bool failed);
+void OnInitialize();
+#else
+#include <dlfcn.h>
+SANITIZER_WEAK_CXX_DEFAULT_IMPL
+bool OnFinalize(bool failed) {
+#if !SANITIZER_GO
+  if (on_finalize)
+    return on_finalize(failed);
+#endif
+  return failed;
+}
+SANITIZER_WEAK_CXX_DEFAULT_IMPL
+void OnInitialize() {
+#if !SANITIZER_GO
+  if (on_initialize)
+    on_initialize();
+#endif
+}
+#endif
+
+static ThreadContextBase *CreateThreadContext(Tid tid) {
+  // Map thread trace when context is created.
+  char name[50];
+  internal_snprintf(name, sizeof(name), "trace %u", tid);
+  MapThreadTrace(GetThreadTrace(tid), TraceSize() * sizeof(Event), name);
+  const uptr hdr = GetThreadTraceHeader(tid);
+  internal_snprintf(name, sizeof(name), "trace header %u", tid);
+  MapThreadTrace(hdr, sizeof(Trace), name);
+  new((void*)hdr) Trace();
+  // We are going to use only a small part of the trace with the default
+  // value of history_size. However, the constructor writes to the whole trace.
+  // Release the unused part.
+  uptr hdr_end = hdr + sizeof(Trace);
+  hdr_end -= sizeof(TraceHeader) * (kTraceParts - TraceParts());
+  hdr_end = RoundUp(hdr_end, GetPageSizeCached());
+  if (hdr_end < hdr + sizeof(Trace)) {
+    ReleaseMemoryPagesToOS(hdr_end, hdr + sizeof(Trace));
+    uptr unused = hdr + sizeof(Trace) - hdr_end;
+    if (hdr_end != (uptr)MmapFixedNoAccess(hdr_end, unused)) {
+      Report("ThreadSanitizer: failed to mprotect [0x%zx-0x%zx) \n", hdr_end,
+             unused);
+      CHECK("unable to mprotect" && 0);
+    }
+  }
+  return New<ThreadContext>(tid);
+}
+
+#if !SANITIZER_GO
+static const u32 kThreadQuarantineSize = 16;
+#else
+static const u32 kThreadQuarantineSize = 64;
+#endif
+
+Context::Context()
+    : initialized(),
+      report_mtx(MutexTypeReport),
+      nreported(),
+      thread_registry(CreateThreadContext, kMaxTid, kThreadQuarantineSize,
+                      kMaxTidReuse),
+      racy_mtx(MutexTypeRacy),
+      racy_stacks(),
+      racy_addresses(),
+      fired_suppressions_mtx(MutexTypeFired),
+      clock_alloc(LINKER_INITIALIZED, "clock allocator") {
+  fired_suppressions.reserve(8);
+}
+
+// The objects are allocated in TLS, so one may rely on zero-initialization.
+ThreadState::ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
+                         unsigned reuse_count, uptr stk_addr, uptr stk_size,
+                         uptr tls_addr, uptr tls_size)
+    : fast_state(tid, epoch)
+      // Do not touch these, rely on zero initialization,
+      // they may be accessed before the ctor.
+      // , ignore_reads_and_writes()
+      // , ignore_interceptors()
+      ,
+      clock(tid, reuse_count)
+#if !SANITIZER_GO
+      ,
+      jmp_bufs()
+#endif
+      ,
+      tid(tid),
+      unique_id(unique_id),
+      stk_addr(stk_addr),
+      stk_size(stk_size),
+      tls_addr(tls_addr),
+      tls_size(tls_size)
+#if !SANITIZER_GO
+      ,
+      last_sleep_clock(tid)
+#endif
+{
+  CHECK_EQ(reinterpret_cast<uptr>(this) % SANITIZER_CACHE_LINE_SIZE, 0);
+#if !SANITIZER_GO
+  // C/C++ uses fixed size shadow stack.
+  const int kInitStackSize = kShadowStackSize;
+  shadow_stack = static_cast<uptr *>(
+      MmapNoReserveOrDie(kInitStackSize * sizeof(uptr), "shadow stack"));
+  SetShadowRegionHugePageMode(reinterpret_cast<uptr>(shadow_stack),
+                              kInitStackSize * sizeof(uptr));
+#else
+  // Go uses malloc-allocated shadow stack with dynamic size.
+  const int kInitStackSize = 8;
+  shadow_stack = static_cast<uptr *>(Alloc(kInitStackSize * sizeof(uptr)));
+#endif
+  shadow_stack_pos = shadow_stack;
+  shadow_stack_end = shadow_stack + kInitStackSize;
+}
+
+#if !SANITIZER_GO
+void MemoryProfiler(u64 uptime) {
+  if (ctx->memprof_fd == kInvalidFd)
+    return;
+  InternalMmapVector<char> buf(4096);
+  WriteMemoryProfile(buf.data(), buf.size(), uptime);
+  WriteToFile(ctx->memprof_fd, buf.data(), internal_strlen(buf.data()));
+}
+
+void InitializeMemoryProfiler() {
+  ctx->memprof_fd = kInvalidFd;
+  const char *fname = flags()->profile_memory;
+  if (!fname || !fname[0])
+    return;
+  if (internal_strcmp(fname, "stdout") == 0) {
+    ctx->memprof_fd = 1;
+  } else if (internal_strcmp(fname, "stderr") == 0) {
+    ctx->memprof_fd = 2;
+  } else {
+    InternalScopedString filename;
+    filename.append("%s.%d", fname, (int)internal_getpid());
+    ctx->memprof_fd = OpenFile(filename.data(), WrOnly);
+    if (ctx->memprof_fd == kInvalidFd) {
+      Printf("ThreadSanitizer: failed to open memory profile file '%s'\n",
+             filename.data());
+      return;
+    }
+  }
+  MemoryProfiler(0);
+  MaybeSpawnBackgroundThread();
+}
+
+static void *BackgroundThread(void *arg) {
+  // This is a non-initialized non-user thread, nothing to see here.
+  // We don't use ScopedIgnoreInterceptors, because we want ignores to be
+  // enabled even when the thread function exits (e.g. during pthread thread
+  // shutdown code).
+  cur_thread_init()->ignore_interceptors++;
+  const u64 kMs2Ns = 1000 * 1000;
+  const u64 start = NanoTime();
+
+  u64 last_flush = NanoTime();
+  uptr last_rss = 0;
+  for (int i = 0;
+      atomic_load(&ctx->stop_background_thread, memory_order_relaxed) == 0;
+      i++) {
+    SleepForMillis(100);
+    u64 now = NanoTime();
+
+    // Flush memory if requested.
+    if (flags()->flush_memory_ms > 0) {
+      if (last_flush + flags()->flush_memory_ms * kMs2Ns < now) {
+        VPrintf(1, "ThreadSanitizer: periodic memory flush\n");
+        FlushShadowMemory();
+        last_flush = NanoTime();
+      }
+    }
+    if (flags()->memory_limit_mb > 0) {
+      uptr rss = GetRSS();
+      uptr limit = uptr(flags()->memory_limit_mb) << 20;
+      VPrintf(1, "ThreadSanitizer: memory flush check"
+                 " RSS=%llu LAST=%llu LIMIT=%llu\n",
+              (u64)rss >> 20, (u64)last_rss >> 20, (u64)limit >> 20);
+      if (2 * rss > limit + last_rss) {
+        VPrintf(1, "ThreadSanitizer: flushing memory due to RSS\n");
+        FlushShadowMemory();
+        rss = GetRSS();
+        VPrintf(1, "ThreadSanitizer: memory flushed RSS=%llu\n", (u64)rss>>20);
+      }
+      last_rss = rss;
+    }
+
+    MemoryProfiler(now - start);
+
+    // Flush symbolizer cache if requested.
+    if (flags()->flush_symbolizer_ms > 0) {
+      u64 last = atomic_load(&ctx->last_symbolize_time_ns,
+                             memory_order_relaxed);
+      if (last != 0 && last + flags()->flush_symbolizer_ms * kMs2Ns < now) {
+        Lock l(&ctx->report_mtx);
+        ScopedErrorReportLock l2;
+        SymbolizeFlush();
+        atomic_store(&ctx->last_symbolize_time_ns, 0, memory_order_relaxed);
+      }
+    }
+  }
+  return nullptr;
+}
+
+static void StartBackgroundThread() {
+  ctx->background_thread = internal_start_thread(&BackgroundThread, 0);
+}
+
+#ifndef __mips__
+static void StopBackgroundThread() {
+  atomic_store(&ctx->stop_background_thread, 1, memory_order_relaxed);
+  internal_join_thread(ctx->background_thread);
+  ctx->background_thread = 0;
+}
+#endif
+#endif
+
+void DontNeedShadowFor(uptr addr, uptr size) {
+  ReleaseMemoryPagesToOS(reinterpret_cast<uptr>(MemToShadow(addr)),
+                         reinterpret_cast<uptr>(MemToShadow(addr + size)));
+}
+
+#if !SANITIZER_GO
+// We call UnmapShadow before the actual munmap, at that point we don't yet
+// know if the provided address/size are sane. We can't call UnmapShadow
+// after the actual munmap becuase at that point the memory range can
+// already be reused for something else, so we can't rely on the munmap
+// return value to understand is the values are sane.
+// While calling munmap with insane values (non-canonical address, negative
+// size, etc) is an error, the kernel won't crash. We must also try to not
+// crash as the failure mode is very confusing (paging fault inside of the
+// runtime on some derived shadow address).
+static bool IsValidMmapRange(uptr addr, uptr size) {
+  if (size == 0)
+    return true;
+  if (static_cast<sptr>(size) < 0)
+    return false;
+  if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
+    return false;
+  // Check that if the start of the region belongs to one of app ranges,
+  // end of the region belongs to the same region.
+  const uptr ranges[][2] = {
+      {LoAppMemBeg(), LoAppMemEnd()},
+      {MidAppMemBeg(), MidAppMemEnd()},
+      {HiAppMemBeg(), HiAppMemEnd()},
+  };
+  for (auto range : ranges) {
+    if (addr >= range[0] && addr < range[1])
+      return addr + size <= range[1];
+  }
+  return false;
+}
+
+void UnmapShadow(ThreadState *thr, uptr addr, uptr size) {
+  if (size == 0 || !IsValidMmapRange(addr, size))
+    return;
+  DontNeedShadowFor(addr, size);
+  ScopedGlobalProcessor sgp;
+  ctx->metamap.ResetRange(thr->proc(), addr, size);
+}
+#endif
+
+void MapShadow(uptr addr, uptr size) {
+  // Global data is not 64K aligned, but there are no adjacent mappings,
+  // so we can get away with unaligned mapping.
+  // CHECK_EQ(addr, addr & ~((64 << 10) - 1));  // windows wants 64K alignment
+  const uptr kPageSize = GetPageSizeCached();
+  uptr shadow_begin = RoundDownTo((uptr)MemToShadow(addr), kPageSize);
+  uptr shadow_end = RoundUpTo((uptr)MemToShadow(addr + size), kPageSize);
+  if (!MmapFixedSuperNoReserve(shadow_begin, shadow_end - shadow_begin,
+                               "shadow"))
+    Die();
+
+  // Meta shadow is 2:1, so tread carefully.
+  static bool data_mapped = false;
+  static uptr mapped_meta_end = 0;
+  uptr meta_begin = (uptr)MemToMeta(addr);
+  uptr meta_end = (uptr)MemToMeta(addr + size);
+  meta_begin = RoundDownTo(meta_begin, 64 << 10);
+  meta_end = RoundUpTo(meta_end, 64 << 10);
+  if (!data_mapped) {
+    // First call maps data+bss.
+    data_mapped = true;
+    if (!MmapFixedSuperNoReserve(meta_begin, meta_end - meta_begin,
+                                 "meta shadow"))
+      Die();
+  } else {
+    // Mapping continuous heap.
+    // Windows wants 64K alignment.
+    meta_begin = RoundDownTo(meta_begin, 64 << 10);
+    meta_end = RoundUpTo(meta_end, 64 << 10);
+    if (meta_end <= mapped_meta_end)
+      return;
+    if (meta_begin < mapped_meta_end)
+      meta_begin = mapped_meta_end;
+    if (!MmapFixedSuperNoReserve(meta_begin, meta_end - meta_begin,
+                                 "meta shadow"))
+      Die();
+    mapped_meta_end = meta_end;
+  }
+  VPrintf(2, "mapped meta shadow for (0x%zx-0x%zx) at (0x%zx-0x%zx)\n", addr,
+          addr + size, meta_begin, meta_end);
+}
+
+void MapThreadTrace(uptr addr, uptr size, const char *name) {
+  DPrintf("#0: Mapping trace at 0x%zx-0x%zx(0x%zx)\n", addr, addr + size, size);
+  CHECK_GE(addr, TraceMemBeg());
+  CHECK_LE(addr + size, TraceMemEnd());
+  CHECK_EQ(addr, addr & ~((64 << 10) - 1));  // windows wants 64K alignment
+  if (!MmapFixedSuperNoReserve(addr, size, name)) {
+    Printf("FATAL: ThreadSanitizer can not mmap thread trace (0x%zx/0x%zx)\n",
+           addr, size);
+    Die();
+  }
+}
+
+#if !SANITIZER_GO
+static void OnStackUnwind(const SignalContext &sig, const void *,
+                          BufferedStackTrace *stack) {
+  stack->Unwind(StackTrace::GetNextInstructionPc(sig.pc), sig.bp, sig.context,
+                common_flags()->fast_unwind_on_fatal);
+}
+
+static void TsanOnDeadlySignal(int signo, void *siginfo, void *context) {
+  HandleDeadlySignal(siginfo, context, GetTid(), &OnStackUnwind, nullptr);
+}
+#endif
+
+void CheckUnwind() {
+  // There is high probability that interceptors will check-fail as well,
+  // on the other hand there is no sense in processing interceptors
+  // since we are going to die soon.
+  ScopedIgnoreInterceptors ignore;
+#if !SANITIZER_GO
+  cur_thread()->ignore_sync++;
+  cur_thread()->ignore_reads_and_writes++;
+#endif
+  PrintCurrentStackSlow(StackTrace::GetCurrentPc());
+}
+
+bool is_initialized;
+
+void Initialize(ThreadState *thr) {
+  // Thread safe because done before all threads exist.
+  if (is_initialized)
+    return;
+  is_initialized = true;
+  // We are not ready to handle interceptors yet.
+  ScopedIgnoreInterceptors ignore;
+  SanitizerToolName = "ThreadSanitizer";
+  // Install tool-specific callbacks in sanitizer_common.
+  SetCheckUnwindCallback(CheckUnwind);
+
+  ctx = new(ctx_placeholder) Context;
+  const char *env_name = SANITIZER_GO ? "GORACE" : "TSAN_OPTIONS";
+  const char *options = GetEnv(env_name);
+  CacheBinaryName();
+  CheckASLR();
+  InitializeFlags(&ctx->flags, options, env_name);
+  AvoidCVE_2016_2143();
+  __sanitizer::InitializePlatformEarly();
+  __tsan::InitializePlatformEarly();
+
+#if !SANITIZER_GO
+  // Re-exec ourselves if we need to set additional env or command line args.
+  MaybeReexec();
+
+  InitializeAllocator();
+  ReplaceSystemMalloc();
+#endif
+  if (common_flags()->detect_deadlocks)
+    ctx->dd = DDetector::Create(flags());
+  Processor *proc = ProcCreate();
+  ProcWire(proc, thr);
+  InitializeInterceptors();
+  InitializePlatform();
+  InitializeDynamicAnnotations();
+#if !SANITIZER_GO
+  InitializeShadowMemory();
+  InitializeAllocatorLate();
+  InstallDeadlySignalHandlers(TsanOnDeadlySignal);
+#endif
+  // Setup correct file descriptor for error reports.
+  __sanitizer_set_report_path(common_flags()->log_path);
+  InitializeSuppressions();
+#if !SANITIZER_GO
+  InitializeLibIgnore();
+  Symbolizer::GetOrInit()->AddHooks(EnterSymbolizer, ExitSymbolizer);
+#endif
+
+  VPrintf(1, "***** Running under ThreadSanitizer v2 (pid %d) *****\n",
+          (int)internal_getpid());
+
+  // Initialize thread 0.
+  Tid tid = ThreadCreate(thr, 0, 0, true);
+  CHECK_EQ(tid, kMainTid);
+  ThreadStart(thr, tid, GetTid(), ThreadType::Regular);
+#if TSAN_CONTAINS_UBSAN
+  __ubsan::InitAsPlugin();
+#endif
+  ctx->initialized = true;
+
+#if !SANITIZER_GO
+  Symbolizer::LateInitialize();
+  InitializeMemoryProfiler();
+#endif
+
+  if (flags()->stop_on_start) {
+    Printf("ThreadSanitizer is suspended at startup (pid %d)."
+           " Call __tsan_resume().\n",
+           (int)internal_getpid());
+    while (__tsan_resumed == 0) {}
+  }
+
+  OnInitialize();
+}
+
+void MaybeSpawnBackgroundThread() {
+  // On MIPS, TSan initialization is run before
+  // __pthread_initialize_minimal_internal() is finished, so we can not spawn
+  // new threads.
+#if !SANITIZER_GO && !defined(__mips__)
+  static atomic_uint32_t bg_thread = {};
+  if (atomic_load(&bg_thread, memory_order_relaxed) == 0 &&
+      atomic_exchange(&bg_thread, 1, memory_order_relaxed) == 0) {
+    StartBackgroundThread();
+    SetSandboxingCallback(StopBackgroundThread);
+  }
+#endif
+}
+
+
+int Finalize(ThreadState *thr) {
+  bool failed = false;
+
+  if (common_flags()->print_module_map == 1)
+    DumpProcessMap();
+
+  if (flags()->atexit_sleep_ms > 0 && ThreadCount(thr) > 1)
+    SleepForMillis(flags()->atexit_sleep_ms);
+
+  // Wait for pending reports.
+  ctx->report_mtx.Lock();
+  { ScopedErrorReportLock l; }
+  ctx->report_mtx.Unlock();
+
+#if !SANITIZER_GO
+  if (Verbosity()) AllocatorPrintStats();
+#endif
+
+  ThreadFinalize(thr);
+
+  if (ctx->nreported) {
+    failed = true;
+#if !SANITIZER_GO
+    Printf("ThreadSanitizer: reported %d warnings\n", ctx->nreported);
+#else
+    Printf("Found %d data race(s)\n", ctx->nreported);
+#endif
+  }
+
+  if (common_flags()->print_suppressions)
+    PrintMatchedSuppressions();
+
+  failed = OnFinalize(failed);
+
+  return failed ? common_flags()->exitcode : 0;
+}
+
+#if !SANITIZER_GO
+void ForkBefore(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
+  ctx->thread_registry.Lock();
+  ctx->report_mtx.Lock();
+  ScopedErrorReportLock::Lock();
+  AllocatorLock();
+  // Suppress all reports in the pthread_atfork callbacks.
+  // Reports will deadlock on the report_mtx.
+  // We could ignore sync operations as well,
+  // but so far it's unclear if it will do more good or harm.
+  // Unnecessarily ignoring things can lead to false positives later.
+  thr->suppress_reports++;
+  // On OS X, REAL(fork) can call intercepted functions (OSSpinLockLock), and
+  // we'll assert in CheckNoLocks() unless we ignore interceptors.
+  // On OS X libSystem_atfork_prepare/parent/child callbacks are called
+  // after/before our callbacks and they call free.
+  thr->ignore_interceptors++;
+  // Disables memory write in OnUserAlloc/Free.
+  thr->ignore_reads_and_writes++;
+
+  __tsan_test_only_on_fork();
+}
+
+void ForkParentAfter(ThreadState *thr, uptr pc) NO_THREAD_SAFETY_ANALYSIS {
+  thr->suppress_reports--;  // Enabled in ForkBefore.
+  thr->ignore_interceptors--;
+  thr->ignore_reads_and_writes--;
+  AllocatorUnlock();
+  ScopedErrorReportLock::Unlock();
+  ctx->report_mtx.Unlock();
+  ctx->thread_registry.Unlock();
+}
+
+void ForkChildAfter(ThreadState *thr, uptr pc,
+                    bool start_thread) NO_THREAD_SAFETY_ANALYSIS {
+  thr->suppress_reports--;  // Enabled in ForkBefore.
+  thr->ignore_interceptors--;
+  thr->ignore_reads_and_writes--;
+  AllocatorUnlock();
+  ScopedErrorReportLock::Unlock();
+  ctx->report_mtx.Unlock();
+  ctx->thread_registry.Unlock();
+
+  uptr nthread = 0;
+  ctx->thread_registry.GetNumberOfThreads(0, 0, &nthread /* alive threads */);
+  VPrintf(1, "ThreadSanitizer: forked new process with pid %d,"
+      " parent had %d threads\n", (int)internal_getpid(), (int)nthread);
+  if (nthread == 1) {
+    if (start_thread)
+      StartBackgroundThread();
+  } else {
+    // We've just forked a multi-threaded process. We cannot reasonably function
+    // after that (some mutexes may be locked before fork). So just enable
+    // ignores for everything in the hope that we will exec soon.
+    ctx->after_multithreaded_fork = true;
+    thr->ignore_interceptors++;
+    ThreadIgnoreBegin(thr, pc);
+    ThreadIgnoreSyncBegin(thr, pc);
+  }
+}
+#endif
+
+#if SANITIZER_GO
+NOINLINE
+void GrowShadowStack(ThreadState *thr) {
+  const int sz = thr->shadow_stack_end - thr->shadow_stack;
+  const int newsz = 2 * sz;
+  auto *newstack = (uptr *)Alloc(newsz * sizeof(uptr));
+  internal_memcpy(newstack, thr->shadow_stack, sz * sizeof(uptr));
+  Free(thr->shadow_stack);
+  thr->shadow_stack = newstack;
+  thr->shadow_stack_pos = newstack + sz;
+  thr->shadow_stack_end = newstack + newsz;
+}
+#endif
+
+StackID CurrentStackId(ThreadState *thr, uptr pc) {
+  if (!thr->is_inited)  // May happen during bootstrap.
+    return kInvalidStackID;
+  if (pc != 0) {
+#if !SANITIZER_GO
+    DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
+#else
+    if (thr->shadow_stack_pos == thr->shadow_stack_end)
+      GrowShadowStack(thr);
+#endif
+    thr->shadow_stack_pos[0] = pc;
+    thr->shadow_stack_pos++;
+  }
+  StackID id = StackDepotPut(
+      StackTrace(thr->shadow_stack, thr->shadow_stack_pos - thr->shadow_stack));
+  if (pc != 0)
+    thr->shadow_stack_pos--;
+  return id;
+}
+
+namespace v3 {
+
+NOINLINE
+void TraceSwitchPart(ThreadState *thr) {
+  Trace *trace = &thr->tctx->trace;
+  Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos));
+  DCHECK_EQ(reinterpret_cast<uptr>(pos + 1) & TracePart::kAlignment, 0);
+  auto *part = trace->parts.Back();
+  DPrintf("TraceSwitchPart part=%p pos=%p\n", part, pos);
+  if (part) {
+    // We can get here when we still have space in the current trace part.
+    // The fast-path check in TraceAcquire has false positives in the middle of
+    // the part. Check if we are indeed at the end of the current part or not,
+    // and fill any gaps with NopEvent's.
+    Event *end = &part->events[TracePart::kSize];
+    DCHECK_GE(pos, &part->events[0]);
+    DCHECK_LE(pos, end);
+    if (pos + 1 < end) {
+      if ((reinterpret_cast<uptr>(pos) & TracePart::kAlignment) ==
+          TracePart::kAlignment)
+        *pos++ = NopEvent;
+      *pos++ = NopEvent;
+      DCHECK_LE(pos + 2, end);
+      atomic_store_relaxed(&thr->trace_pos, reinterpret_cast<uptr>(pos));
+      // Ensure we setup trace so that the next TraceAcquire
+      // won't detect trace part end.
+      Event *ev;
+      CHECK(TraceAcquire(thr, &ev));
+      return;
+    }
+    // We are indeed at the end.
+    for (; pos < end; pos++) *pos = NopEvent;
+  }
+#if !SANITIZER_GO
+  if (ctx->after_multithreaded_fork) {
+    // We just need to survive till exec.
+    CHECK(part);
+    atomic_store_relaxed(&thr->trace_pos,
+                         reinterpret_cast<uptr>(&part->events[0]));
+    return;
+  }
+#endif
+  part = new (MmapOrDie(sizeof(TracePart), "TracePart")) TracePart();
+  part->trace = trace;
+  thr->trace_prev_pc = 0;
+  {
+    Lock lock(&trace->mtx);
+    trace->parts.PushBack(part);
+    atomic_store_relaxed(&thr->trace_pos,
+                         reinterpret_cast<uptr>(&part->events[0]));
+  }
+  // Make this part self-sufficient by restoring the current stack
+  // and mutex set in the beginning of the trace.
+  TraceTime(thr);
+  for (uptr *pos = &thr->shadow_stack[0]; pos < thr->shadow_stack_pos; pos++)
+    CHECK(TryTraceFunc(thr, *pos));
+  for (uptr i = 0; i < thr->mset.Size(); i++) {
+    MutexSet::Desc d = thr->mset.Get(i);
+    TraceMutexLock(thr, d.write ? EventType::kLock : EventType::kRLock, 0,
+                   d.addr, d.stack_id);
+  }
+}
+
+}  // namespace v3
+
+void TraceSwitch(ThreadState *thr) {
+#if !SANITIZER_GO
+  if (ctx->after_multithreaded_fork)
+    return;
+#endif
+  thr->nomalloc++;
+  Trace *thr_trace = ThreadTrace(thr->tid);
+  Lock l(&thr_trace->mtx);
+  unsigned trace = (thr->fast_state.epoch() / kTracePartSize) % TraceParts();
+  TraceHeader *hdr = &thr_trace->headers[trace];
+  hdr->epoch0 = thr->fast_state.epoch();
+  ObtainCurrentStack(thr, 0, &hdr->stack0);
+  hdr->mset0 = thr->mset;
+  thr->nomalloc--;
+}
+
+Trace *ThreadTrace(Tid tid) { return (Trace *)GetThreadTraceHeader(tid); }
+
+uptr TraceTopPC(ThreadState *thr) {
+  Event *events = (Event*)GetThreadTrace(thr->tid);
+  uptr pc = events[thr->fast_state.GetTracePos()];
+  return pc;
+}
+
+uptr TraceSize() {
+  return (uptr)(1ull << (kTracePartSizeBits + flags()->history_size + 1));
+}
+
+uptr TraceParts() {
+  return TraceSize() / kTracePartSize;
+}
+
+#if !SANITIZER_GO
+extern "C" void __tsan_trace_switch() {
+  TraceSwitch(cur_thread());
+}
+
+extern "C" void __tsan_report_race() {
+  ReportRace(cur_thread());
+}
+#endif
+
+void ThreadIgnoreBegin(ThreadState *thr, uptr pc) {
+  DPrintf("#%d: ThreadIgnoreBegin\n", thr->tid);
+  thr->ignore_reads_and_writes++;
+  CHECK_GT(thr->ignore_reads_and_writes, 0);
+  thr->fast_state.SetIgnoreBit();
+#if !SANITIZER_GO
+  if (pc && !ctx->after_multithreaded_fork)
+    thr->mop_ignore_set.Add(CurrentStackId(thr, pc));
+#endif
+}
+
+void ThreadIgnoreEnd(ThreadState *thr) {
+  DPrintf("#%d: ThreadIgnoreEnd\n", thr->tid);
+  CHECK_GT(thr->ignore_reads_and_writes, 0);
+  thr->ignore_reads_and_writes--;
+  if (thr->ignore_reads_and_writes == 0) {
+    thr->fast_state.ClearIgnoreBit();
+#if !SANITIZER_GO
+    thr->mop_ignore_set.Reset();
+#endif
+  }
+}
+
+#if !SANITIZER_GO
+extern "C" SANITIZER_INTERFACE_ATTRIBUTE
+uptr __tsan_testonly_shadow_stack_current_size() {
+  ThreadState *thr = cur_thread();
+  return thr->shadow_stack_pos - thr->shadow_stack;
+}
+#endif
+
+void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc) {
+  DPrintf("#%d: ThreadIgnoreSyncBegin\n", thr->tid);
+  thr->ignore_sync++;
+  CHECK_GT(thr->ignore_sync, 0);
+#if !SANITIZER_GO
+  if (pc && !ctx->after_multithreaded_fork)
+    thr->sync_ignore_set.Add(CurrentStackId(thr, pc));
+#endif
+}
+
+void ThreadIgnoreSyncEnd(ThreadState *thr) {
+  DPrintf("#%d: ThreadIgnoreSyncEnd\n", thr->tid);
+  CHECK_GT(thr->ignore_sync, 0);
+  thr->ignore_sync--;
+#if !SANITIZER_GO
+  if (thr->ignore_sync == 0)
+    thr->sync_ignore_set.Reset();
+#endif
+}
+
+bool MD5Hash::operator==(const MD5Hash &other) const {
+  return hash[0] == other.hash[0] && hash[1] == other.hash[1];
+}
+
+#if SANITIZER_DEBUG
+void build_consistency_debug() {}
+#else
+void build_consistency_release() {}
+#endif
+
+}  // namespace __tsan
+
+#if SANITIZER_CHECK_DEADLOCKS
+namespace __sanitizer {
+using namespace __tsan;
+MutexMeta mutex_meta[] = {
+    {MutexInvalid, "Invalid", {}},
+    {MutexThreadRegistry, "ThreadRegistry", {}},
+    {MutexTypeTrace, "Trace", {}},
+    {MutexTypeReport,
+     "Report",
+     {MutexTypeSyncVar, MutexTypeGlobalProc, MutexTypeTrace}},
+    {MutexTypeSyncVar, "SyncVar", {MutexTypeTrace}},
+    {MutexTypeAnnotations, "Annotations", {}},
+    {MutexTypeAtExit, "AtExit", {MutexTypeSyncVar}},
+    {MutexTypeFired, "Fired", {MutexLeaf}},
+    {MutexTypeRacy, "Racy", {MutexLeaf}},
+    {MutexTypeGlobalProc, "GlobalProc", {}},
+    {MutexTypeInternalAlloc, "InternalAlloc", {MutexLeaf}},
+    {},
+};
+
+void PrintMutexPC(uptr pc) { StackTrace(&pc, 1).Print(); }
+}  // namespace __sanitizer
+#endif

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl.h b/compiler-rt/lib/tsan/rtl-old/tsan_rtl.h
new file mode 100644
index 0000000000000..c71b27e1cbf58
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl.h
@@ -0,0 +1,796 @@
+//===-- tsan_rtl.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Main internal TSan header file.
+//
+// Ground rules:
+//   - C++ run-time should not be used (static CTORs, RTTI, exceptions, static
+//     function-scope locals)
+//   - All functions/classes/etc reside in namespace __tsan, except for those
+//     declared in tsan_interface.h.
+//   - Platform-specific files should be used instead of ifdefs (*).
+//   - No system headers included in header files (*).
+//   - Platform specific headres included only into platform-specific files (*).
+//
+//  (*) Except when inlining is critical for performance.
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_RTL_H
+#define TSAN_RTL_H
+
+#include "sanitizer_common/sanitizer_allocator.h"
+#include "sanitizer_common/sanitizer_allocator_internal.h"
+#include "sanitizer_common/sanitizer_asm.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_deadlock_detector_interface.h"
+#include "sanitizer_common/sanitizer_libignore.h"
+#include "sanitizer_common/sanitizer_suppressions.h"
+#include "sanitizer_common/sanitizer_thread_registry.h"
+#include "sanitizer_common/sanitizer_vector.h"
+#include "tsan_clock.h"
+#include "tsan_defs.h"
+#include "tsan_flags.h"
+#include "tsan_ignoreset.h"
+#include "tsan_mman.h"
+#include "tsan_mutexset.h"
+#include "tsan_platform.h"
+#include "tsan_report.h"
+#include "tsan_shadow.h"
+#include "tsan_stack_trace.h"
+#include "tsan_sync.h"
+#include "tsan_trace.h"
+
+#if SANITIZER_WORDSIZE != 64
+# error "ThreadSanitizer is supported only on 64-bit platforms"
+#endif
+
+namespace __tsan {
+
+#if !SANITIZER_GO
+struct MapUnmapCallback;
+#if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__)
+
+struct AP32 {
+  static const uptr kSpaceBeg = 0;
+  static const u64 kSpaceSize = SANITIZER_MMAP_RANGE_SIZE;
+  static const uptr kMetadataSize = 0;
+  typedef __sanitizer::CompactSizeClassMap SizeClassMap;
+  static const uptr kRegionSizeLog = 20;
+  using AddressSpaceView = LocalAddressSpaceView;
+  typedef __tsan::MapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+};
+typedef SizeClassAllocator32<AP32> PrimaryAllocator;
+#else
+struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
+#    if defined(__s390x__)
+  typedef MappingS390x Mapping;
+#    else
+  typedef Mapping48AddressSpace Mapping;
+#    endif
+  static const uptr kSpaceBeg = Mapping::kHeapMemBeg;
+  static const uptr kSpaceSize = Mapping::kHeapMemEnd - Mapping::kHeapMemBeg;
+  static const uptr kMetadataSize = 0;
+  typedef DefaultSizeClassMap SizeClassMap;
+  typedef __tsan::MapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+  using AddressSpaceView = LocalAddressSpaceView;
+};
+typedef SizeClassAllocator64<AP64> PrimaryAllocator;
+#endif
+typedef CombinedAllocator<PrimaryAllocator> Allocator;
+typedef Allocator::AllocatorCache AllocatorCache;
+Allocator *allocator();
+#endif
+
+struct ThreadSignalContext;
+
+struct JmpBuf {
+  uptr sp;
+  int int_signal_send;
+  bool in_blocking_func;
+  uptr in_signal_handler;
+  uptr *shadow_stack_pos;
+};
+
+// A Processor represents a physical thread, or a P for Go.
+// It is used to store internal resources like allocate cache, and does not
+// participate in race-detection logic (invisible to end user).
+// In C++ it is tied to an OS thread just like ThreadState, however ideally
+// it should be tied to a CPU (this way we will have fewer allocator caches).
+// In Go it is tied to a P, so there are significantly fewer Processor's than
+// ThreadState's (which are tied to Gs).
+// A ThreadState must be wired with a Processor to handle events.
+struct Processor {
+  ThreadState *thr; // currently wired thread, or nullptr
+#if !SANITIZER_GO
+  AllocatorCache alloc_cache;
+  InternalAllocatorCache internal_alloc_cache;
+#endif
+  DenseSlabAllocCache block_cache;
+  DenseSlabAllocCache sync_cache;
+  DenseSlabAllocCache clock_cache;
+  DDPhysicalThread *dd_pt;
+};
+
+#if !SANITIZER_GO
+// ScopedGlobalProcessor temporary setups a global processor for the current
+// thread, if it does not have one. Intended for interceptors that can run
+// at the very thread end, when we already destroyed the thread processor.
+struct ScopedGlobalProcessor {
+  ScopedGlobalProcessor();
+  ~ScopedGlobalProcessor();
+};
+#endif
+
+// This struct is stored in TLS.
+struct ThreadState {
+  FastState fast_state;
+  // Synch epoch represents the threads's epoch before the last synchronization
+  // action. It allows to reduce number of shadow state updates.
+  // For example, fast_synch_epoch=100, last write to addr X was at epoch=150,
+  // if we are processing write to X from the same thread at epoch=200,
+  // we do nothing, because both writes happen in the same 'synch epoch'.
+  // That is, if another memory access does not race with the former write,
+  // it does not race with the latter as well.
+  // QUESTION: can we can squeeze this into ThreadState::Fast?
+  // E.g. ThreadState::Fast is a 44-bit, 32 are taken by synch_epoch and 12 are
+  // taken by epoch between synchs.
+  // This way we can save one load from tls.
+  u64 fast_synch_epoch;
+  // Technically `current` should be a separate THREADLOCAL variable;
+  // but it is placed here in order to share cache line with previous fields.
+  ThreadState* current;
+  // This is a slow path flag. On fast path, fast_state.GetIgnoreBit() is read.
+  // We do not distinguish beteween ignoring reads and writes
+  // for better performance.
+  int ignore_reads_and_writes;
+  atomic_sint32_t pending_signals;
+  int ignore_sync;
+  int suppress_reports;
+  // Go does not support ignores.
+#if !SANITIZER_GO
+  IgnoreSet mop_ignore_set;
+  IgnoreSet sync_ignore_set;
+#endif
+  uptr *shadow_stack;
+  uptr *shadow_stack_end;
+  uptr *shadow_stack_pos;
+  RawShadow *racy_shadow_addr;
+  RawShadow racy_state[2];
+  MutexSet mset;
+  ThreadClock clock;
+#if !SANITIZER_GO
+  Vector<JmpBuf> jmp_bufs;
+  int ignore_interceptors;
+#endif
+  const Tid tid;
+  const int unique_id;
+  bool in_symbolizer;
+  bool in_ignored_lib;
+  bool is_inited;
+  bool is_dead;
+  bool is_freeing;
+  bool is_vptr_access;
+  const uptr stk_addr;
+  const uptr stk_size;
+  const uptr tls_addr;
+  const uptr tls_size;
+  ThreadContext *tctx;
+
+  DDLogicalThread *dd_lt;
+
+  // Current wired Processor, or nullptr. Required to handle any events.
+  Processor *proc1;
+#if !SANITIZER_GO
+  Processor *proc() { return proc1; }
+#else
+  Processor *proc();
+#endif
+
+  atomic_uintptr_t in_signal_handler;
+  ThreadSignalContext *signal_ctx;
+
+#if !SANITIZER_GO
+  StackID last_sleep_stack_id;
+  ThreadClock last_sleep_clock;
+#endif
+
+  // Set in regions of runtime that must be signal-safe and fork-safe.
+  // If set, malloc must not be called.
+  int nomalloc;
+
+  const ReportDesc *current_report;
+
+  // Current position in tctx->trace.Back()->events (Event*).
+  atomic_uintptr_t trace_pos;
+  // PC of the last memory access, used to compute PC deltas in the trace.
+  uptr trace_prev_pc;
+  Sid sid;
+  Epoch epoch;
+
+  explicit ThreadState(Context *ctx, Tid tid, int unique_id, u64 epoch,
+                       unsigned reuse_count, uptr stk_addr, uptr stk_size,
+                       uptr tls_addr, uptr tls_size);
+} ALIGNED(SANITIZER_CACHE_LINE_SIZE);
+
+#if !SANITIZER_GO
+#if SANITIZER_MAC || SANITIZER_ANDROID
+ThreadState *cur_thread();
+void set_cur_thread(ThreadState *thr);
+void cur_thread_finalize();
+inline ThreadState *cur_thread_init() { return cur_thread(); }
+#  else
+__attribute__((tls_model("initial-exec")))
+extern THREADLOCAL char cur_thread_placeholder[];
+inline ThreadState *cur_thread() {
+  return reinterpret_cast<ThreadState *>(cur_thread_placeholder)->current;
+}
+inline ThreadState *cur_thread_init() {
+  ThreadState *thr = reinterpret_cast<ThreadState *>(cur_thread_placeholder);
+  if (UNLIKELY(!thr->current))
+    thr->current = thr;
+  return thr->current;
+}
+inline void set_cur_thread(ThreadState *thr) {
+  reinterpret_cast<ThreadState *>(cur_thread_placeholder)->current = thr;
+}
+inline void cur_thread_finalize() { }
+#  endif  // SANITIZER_MAC || SANITIZER_ANDROID
+#endif  // SANITIZER_GO
+
+class ThreadContext final : public ThreadContextBase {
+ public:
+  explicit ThreadContext(Tid tid);
+  ~ThreadContext();
+  ThreadState *thr;
+  StackID creation_stack_id;
+  SyncClock sync;
+  // Epoch at which the thread had started.
+  // If we see an event from the thread stamped by an older epoch,
+  // the event is from a dead thread that shared tid with this thread.
+  u64 epoch0;
+  u64 epoch1;
+
+  v3::Trace trace;
+
+  // Override superclass callbacks.
+  void OnDead() override;
+  void OnJoined(void *arg) override;
+  void OnFinished() override;
+  void OnStarted(void *arg) override;
+  void OnCreated(void *arg) override;
+  void OnReset() override;
+  void OnDetached(void *arg) override;
+};
+
+struct RacyStacks {
+  MD5Hash hash[2];
+  bool operator==(const RacyStacks &other) const;
+};
+
+struct RacyAddress {
+  uptr addr_min;
+  uptr addr_max;
+};
+
+struct FiredSuppression {
+  ReportType type;
+  uptr pc_or_addr;
+  Suppression *supp;
+};
+
+struct Context {
+  Context();
+
+  bool initialized;
+#if !SANITIZER_GO
+  bool after_multithreaded_fork;
+#endif
+
+  MetaMap metamap;
+
+  Mutex report_mtx;
+  int nreported;
+  atomic_uint64_t last_symbolize_time_ns;
+
+  void *background_thread;
+  atomic_uint32_t stop_background_thread;
+
+  ThreadRegistry thread_registry;
+
+  Mutex racy_mtx;
+  Vector<RacyStacks> racy_stacks;
+  Vector<RacyAddress> racy_addresses;
+  // Number of fired suppressions may be large enough.
+  Mutex fired_suppressions_mtx;
+  InternalMmapVector<FiredSuppression> fired_suppressions;
+  DDetector *dd;
+
+  ClockAlloc clock_alloc;
+
+  Flags flags;
+  fd_t memprof_fd;
+
+  Mutex slot_mtx;
+};
+
+extern Context *ctx;  // The one and the only global runtime context.
+
+ALWAYS_INLINE Flags *flags() {
+  return &ctx->flags;
+}
+
+struct ScopedIgnoreInterceptors {
+  ScopedIgnoreInterceptors() {
+#if !SANITIZER_GO
+    cur_thread()->ignore_interceptors++;
+#endif
+  }
+
+  ~ScopedIgnoreInterceptors() {
+#if !SANITIZER_GO
+    cur_thread()->ignore_interceptors--;
+#endif
+  }
+};
+
+const char *GetObjectTypeFromTag(uptr tag);
+const char *GetReportHeaderFromTag(uptr tag);
+uptr TagFromShadowStackFrame(uptr pc);
+
+class ScopedReportBase {
+ public:
+  void AddMemoryAccess(uptr addr, uptr external_tag, Shadow s, StackTrace stack,
+                       const MutexSet *mset);
+  void AddStack(StackTrace stack, bool suppressable = false);
+  void AddThread(const ThreadContext *tctx, bool suppressable = false);
+  void AddThread(Tid unique_tid, bool suppressable = false);
+  void AddUniqueTid(Tid unique_tid);
+  void AddMutex(const SyncVar *s);
+  u64 AddMutex(u64 id);
+  void AddLocation(uptr addr, uptr size);
+  void AddSleep(StackID stack_id);
+  void SetCount(int count);
+
+  const ReportDesc *GetReport() const;
+
+ protected:
+  ScopedReportBase(ReportType typ, uptr tag);
+  ~ScopedReportBase();
+
+ private:
+  ReportDesc *rep_;
+  // Symbolizer makes lots of intercepted calls. If we try to process them,
+  // at best it will cause deadlocks on internal mutexes.
+  ScopedIgnoreInterceptors ignore_interceptors_;
+
+  void AddDeadMutex(u64 id);
+
+  ScopedReportBase(const ScopedReportBase &) = delete;
+  void operator=(const ScopedReportBase &) = delete;
+};
+
+class ScopedReport : public ScopedReportBase {
+ public:
+  explicit ScopedReport(ReportType typ, uptr tag = kExternalTagNone);
+  ~ScopedReport();
+
+ private:
+  ScopedErrorReportLock lock_;
+};
+
+bool ShouldReport(ThreadState *thr, ReportType typ);
+ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack);
+void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk,
+                  MutexSet *mset, uptr *tag = nullptr);
+
+// The stack could look like:
+//   <start> | <main> | <foo> | tag | <bar>
+// This will extract the tag and keep:
+//   <start> | <main> | <foo> | <bar>
+template<typename StackTraceTy>
+void ExtractTagFromStack(StackTraceTy *stack, uptr *tag = nullptr) {
+  if (stack->size < 2) return;
+  uptr possible_tag_pc = stack->trace[stack->size - 2];
+  uptr possible_tag = TagFromShadowStackFrame(possible_tag_pc);
+  if (possible_tag == kExternalTagNone) return;
+  stack->trace_buffer[stack->size - 2] = stack->trace_buffer[stack->size - 1];
+  stack->size -= 1;
+  if (tag) *tag = possible_tag;
+}
+
+template<typename StackTraceTy>
+void ObtainCurrentStack(ThreadState *thr, uptr toppc, StackTraceTy *stack,
+                        uptr *tag = nullptr) {
+  uptr size = thr->shadow_stack_pos - thr->shadow_stack;
+  uptr start = 0;
+  if (size + !!toppc > kStackTraceMax) {
+    start = size + !!toppc - kStackTraceMax;
+    size = kStackTraceMax - !!toppc;
+  }
+  stack->Init(&thr->shadow_stack[start], size, toppc);
+  ExtractTagFromStack(stack, tag);
+}
+
+#define GET_STACK_TRACE_FATAL(thr, pc) \
+  VarSizeStackTrace stack; \
+  ObtainCurrentStack(thr, pc, &stack); \
+  stack.ReverseOrder();
+
+void MapShadow(uptr addr, uptr size);
+void MapThreadTrace(uptr addr, uptr size, const char *name);
+void DontNeedShadowFor(uptr addr, uptr size);
+void UnmapShadow(ThreadState *thr, uptr addr, uptr size);
+void InitializeShadowMemory();
+void InitializeInterceptors();
+void InitializeLibIgnore();
+void InitializeDynamicAnnotations();
+
+void ForkBefore(ThreadState *thr, uptr pc);
+void ForkParentAfter(ThreadState *thr, uptr pc);
+void ForkChildAfter(ThreadState *thr, uptr pc, bool start_thread);
+
+void ReportRace(ThreadState *thr);
+bool OutputReport(ThreadState *thr, const ScopedReport &srep);
+bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace);
+bool IsExpectedReport(uptr addr, uptr size);
+
+#if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 1
+# define DPrintf Printf
+#else
+# define DPrintf(...)
+#endif
+
+#if defined(TSAN_DEBUG_OUTPUT) && TSAN_DEBUG_OUTPUT >= 2
+# define DPrintf2 Printf
+#else
+# define DPrintf2(...)
+#endif
+
+StackID CurrentStackId(ThreadState *thr, uptr pc);
+ReportStack *SymbolizeStackId(StackID stack_id);
+void PrintCurrentStack(ThreadState *thr, uptr pc);
+void PrintCurrentStackSlow(uptr pc);  // uses libunwind
+MBlock *JavaHeapBlock(uptr addr, uptr *start);
+
+void Initialize(ThreadState *thr);
+void MaybeSpawnBackgroundThread();
+int Finalize(ThreadState *thr);
+
+void OnUserAlloc(ThreadState *thr, uptr pc, uptr p, uptr sz, bool write);
+void OnUserFree(ThreadState *thr, uptr pc, uptr p, bool write);
+
+void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
+    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic);
+void MemoryAccessImpl(ThreadState *thr, uptr addr,
+    int kAccessSizeLog, bool kAccessIsWrite, bool kIsAtomic,
+    u64 *shadow_mem, Shadow cur);
+void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr,
+    uptr size, bool is_write);
+void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                           AccessType typ);
+
+const int kSizeLog1 = 0;
+const int kSizeLog2 = 1;
+const int kSizeLog4 = 2;
+const int kSizeLog8 = 3;
+
+ALWAYS_INLINE
+void MemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                  AccessType typ) {
+  int size_log;
+  switch (size) {
+    case 1:
+      size_log = kSizeLog1;
+      break;
+    case 2:
+      size_log = kSizeLog2;
+      break;
+    case 4:
+      size_log = kSizeLog4;
+      break;
+    default:
+      DCHECK_EQ(size, 8);
+      size_log = kSizeLog8;
+      break;
+  }
+  bool is_write = !(typ & kAccessRead);
+  bool is_atomic = typ & kAccessAtomic;
+  if (typ & kAccessVptr)
+    thr->is_vptr_access = true;
+  if (typ & kAccessFree)
+    thr->is_freeing = true;
+  MemoryAccess(thr, pc, addr, size_log, is_write, is_atomic);
+  if (typ & kAccessVptr)
+    thr->is_vptr_access = false;
+  if (typ & kAccessFree)
+    thr->is_freeing = false;
+}
+
+void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size);
+void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size);
+void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size);
+void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
+                                         uptr size);
+
+void ThreadIgnoreBegin(ThreadState *thr, uptr pc);
+void ThreadIgnoreEnd(ThreadState *thr);
+void ThreadIgnoreSyncBegin(ThreadState *thr, uptr pc);
+void ThreadIgnoreSyncEnd(ThreadState *thr);
+
+void FuncEntry(ThreadState *thr, uptr pc);
+void FuncExit(ThreadState *thr);
+
+Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached);
+void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
+                 ThreadType thread_type);
+void ThreadFinish(ThreadState *thr);
+Tid ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid);
+void ThreadJoin(ThreadState *thr, uptr pc, Tid tid);
+void ThreadDetach(ThreadState *thr, uptr pc, Tid tid);
+void ThreadFinalize(ThreadState *thr);
+void ThreadSetName(ThreadState *thr, const char *name);
+int ThreadCount(ThreadState *thr);
+void ProcessPendingSignalsImpl(ThreadState *thr);
+void ThreadNotJoined(ThreadState *thr, uptr pc, Tid tid, uptr uid);
+
+Processor *ProcCreate();
+void ProcDestroy(Processor *proc);
+void ProcWire(Processor *proc, ThreadState *thr);
+void ProcUnwire(Processor *proc, ThreadState *thr);
+
+// Note: the parameter is called flagz, because flags is already taken
+// by the global function that returns flags.
+void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0,
+    int rec = 1);
+int  MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz = 0);
+void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr);
+void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr);
+void MutexRepair(ThreadState *thr, uptr pc, uptr addr);  // call on EOWNERDEAD
+void MutexInvalidAccess(ThreadState *thr, uptr pc, uptr addr);
+
+void Acquire(ThreadState *thr, uptr pc, uptr addr);
+// AcquireGlobal synchronizes the current thread with all other threads.
+// In terms of happens-before relation, it draws a HB edge from all threads
+// (where they happen to execute right now) to the current thread. We use it to
+// handle Go finalizers. Namely, finalizer goroutine executes AcquireGlobal
+// right before executing finalizers. This provides a coarse, but simple
+// approximation of the actual required synchronization.
+void AcquireGlobal(ThreadState *thr);
+void Release(ThreadState *thr, uptr pc, uptr addr);
+void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr);
+void ReleaseStore(ThreadState *thr, uptr pc, uptr addr);
+void AfterSleep(ThreadState *thr, uptr pc);
+void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c);
+void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c);
+void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c);
+void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c);
+void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c);
+
+// The hacky call uses custom calling convention and an assembly thunk.
+// It is considerably faster that a normal call for the caller
+// if it is not executed (it is intended for slow paths from hot functions).
+// The trick is that the call preserves all registers and the compiler
+// does not treat it as a call.
+// If it does not work for you, use normal call.
+#if !SANITIZER_DEBUG && defined(__x86_64__) && !SANITIZER_MAC
+// The caller may not create the stack frame for itself at all,
+// so we create a reserve stack frame for it (1024b must be enough).
+#define HACKY_CALL(f) \
+  __asm__ __volatile__("sub $1024, %%rsp;" \
+                       CFI_INL_ADJUST_CFA_OFFSET(1024) \
+                       ".hidden " #f "_thunk;" \
+                       "call " #f "_thunk;" \
+                       "add $1024, %%rsp;" \
+                       CFI_INL_ADJUST_CFA_OFFSET(-1024) \
+                       ::: "memory", "cc");
+#else
+#define HACKY_CALL(f) f()
+#endif
+
+void TraceSwitch(ThreadState *thr);
+uptr TraceTopPC(ThreadState *thr);
+uptr TraceSize();
+uptr TraceParts();
+Trace *ThreadTrace(Tid tid);
+
+extern "C" void __tsan_trace_switch();
+void ALWAYS_INLINE TraceAddEvent(ThreadState *thr, FastState fs,
+                                        EventType typ, u64 addr) {
+  if (!kCollectHistory)
+    return;
+  // TraceSwitch accesses shadow_stack, but it's called infrequently,
+  // so we check it here proactively.
+  DCHECK(thr->shadow_stack);
+  DCHECK_GE((int)typ, 0);
+  DCHECK_LE((int)typ, 7);
+  DCHECK_EQ(GetLsb(addr, kEventPCBits), addr);
+  u64 pos = fs.GetTracePos();
+  if (UNLIKELY((pos % kTracePartSize) == 0)) {
+#if !SANITIZER_GO
+    HACKY_CALL(__tsan_trace_switch);
+#else
+    TraceSwitch(thr);
+#endif
+  }
+  Event *trace = (Event*)GetThreadTrace(fs.tid());
+  Event *evp = &trace[pos];
+  Event ev = (u64)addr | ((u64)typ << kEventPCBits);
+  *evp = ev;
+}
+
+#if !SANITIZER_GO
+uptr ALWAYS_INLINE HeapEnd() {
+  return HeapMemEnd() + PrimaryAllocator::AdditionalSize();
+}
+#endif
+
+ThreadState *FiberCreate(ThreadState *thr, uptr pc, unsigned flags);
+void FiberDestroy(ThreadState *thr, uptr pc, ThreadState *fiber);
+void FiberSwitch(ThreadState *thr, uptr pc, ThreadState *fiber, unsigned flags);
+
+// These need to match __tsan_switch_to_fiber_* flags defined in
+// tsan_interface.h. See documentation there as well.
+enum FiberSwitchFlags {
+  FiberSwitchFlagNoSync = 1 << 0, // __tsan_switch_to_fiber_no_sync
+};
+
+ALWAYS_INLINE void ProcessPendingSignals(ThreadState *thr) {
+  if (UNLIKELY(atomic_load_relaxed(&thr->pending_signals)))
+    ProcessPendingSignalsImpl(thr);
+}
+
+extern bool is_initialized;
+
+ALWAYS_INLINE
+void LazyInitialize(ThreadState *thr) {
+  // If we can use .preinit_array, assume that __tsan_init
+  // called from .preinit_array initializes runtime before
+  // any instrumented code.
+#if !SANITIZER_CAN_USE_PREINIT_ARRAY
+  if (UNLIKELY(!is_initialized))
+    Initialize(thr);
+#endif
+}
+
+namespace v3 {
+
+void TraceSwitchPart(ThreadState *thr);
+bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
+                  uptr size, AccessType typ, VarSizeStackTrace *pstk,
+                  MutexSet *pmset, uptr *ptag);
+
+template <typename EventT>
+ALWAYS_INLINE WARN_UNUSED_RESULT bool TraceAcquire(ThreadState *thr,
+                                                   EventT **ev) {
+  Event *pos = reinterpret_cast<Event *>(atomic_load_relaxed(&thr->trace_pos));
+#if SANITIZER_DEBUG
+  // TraceSwitch acquires these mutexes,
+  // so we lock them here to detect deadlocks more reliably.
+  { Lock lock(&ctx->slot_mtx); }
+  { Lock lock(&thr->tctx->trace.mtx); }
+  TracePart *current = thr->tctx->trace.parts.Back();
+  if (current) {
+    DCHECK_GE(pos, &current->events[0]);
+    DCHECK_LE(pos, &current->events[TracePart::kSize]);
+  } else {
+    DCHECK_EQ(pos, nullptr);
+  }
+#endif
+  // TracePart is allocated with mmap and is at least 4K aligned.
+  // So the following check is a faster way to check for part end.
+  // It may have false positives in the middle of the trace,
+  // they are filtered out in TraceSwitch.
+  if (UNLIKELY(((uptr)(pos + 1) & TracePart::kAlignment) == 0))
+    return false;
+  *ev = reinterpret_cast<EventT *>(pos);
+  return true;
+}
+
+template <typename EventT>
+ALWAYS_INLINE void TraceRelease(ThreadState *thr, EventT *evp) {
+  DCHECK_LE(evp + 1, &thr->tctx->trace.parts.Back()->events[TracePart::kSize]);
+  atomic_store_relaxed(&thr->trace_pos, (uptr)(evp + 1));
+}
+
+template <typename EventT>
+void TraceEvent(ThreadState *thr, EventT ev) {
+  EventT *evp;
+  if (!TraceAcquire(thr, &evp)) {
+    TraceSwitchPart(thr);
+    UNUSED bool res = TraceAcquire(thr, &evp);
+    DCHECK(res);
+  }
+  *evp = ev;
+  TraceRelease(thr, evp);
+}
+
+ALWAYS_INLINE WARN_UNUSED_RESULT bool TryTraceFunc(ThreadState *thr,
+                                                   uptr pc = 0) {
+  if (!kCollectHistory)
+    return true;
+  EventFunc *ev;
+  if (UNLIKELY(!TraceAcquire(thr, &ev)))
+    return false;
+  ev->is_access = 0;
+  ev->is_func = 1;
+  ev->pc = pc;
+  TraceRelease(thr, ev);
+  return true;
+}
+
+WARN_UNUSED_RESULT
+bool TryTraceMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                          AccessType typ);
+WARN_UNUSED_RESULT
+bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                               AccessType typ);
+void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                            AccessType typ);
+void TraceFunc(ThreadState *thr, uptr pc = 0);
+void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
+                    StackID stk);
+void TraceMutexUnlock(ThreadState *thr, uptr addr);
+void TraceTime(ThreadState *thr);
+
+}  // namespace v3
+
+void GrowShadowStack(ThreadState *thr);
+
+ALWAYS_INLINE
+void FuncEntry(ThreadState *thr, uptr pc) {
+  DPrintf2("#%d: FuncEntry %p\n", (int)thr->fast_state.tid(), (void *)pc);
+  if (kCollectHistory) {
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeFuncEnter, pc);
+  }
+
+  // Shadow stack maintenance can be replaced with
+  // stack unwinding during trace switch (which presumably must be faster).
+  DCHECK_GE(thr->shadow_stack_pos, thr->shadow_stack);
+#if !SANITIZER_GO
+  DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
+#else
+  if (thr->shadow_stack_pos == thr->shadow_stack_end)
+    GrowShadowStack(thr);
+#endif
+  thr->shadow_stack_pos[0] = pc;
+  thr->shadow_stack_pos++;
+}
+
+ALWAYS_INLINE
+void FuncExit(ThreadState *thr) {
+  DPrintf2("#%d: FuncExit\n", (int)thr->fast_state.tid());
+  if (kCollectHistory) {
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeFuncExit, 0);
+  }
+
+  DCHECK_GT(thr->shadow_stack_pos, thr->shadow_stack);
+#if !SANITIZER_GO
+  DCHECK_LT(thr->shadow_stack_pos, thr->shadow_stack_end);
+#endif
+  thr->shadow_stack_pos--;
+}
+
+#if !SANITIZER_GO
+extern void (*on_initialize)(void);
+extern int (*on_finalize)(int);
+#endif
+
+}  // namespace __tsan
+
+#endif  // TSAN_RTL_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl_aarch64.S b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_aarch64.S
new file mode 100644
index 0000000000000..e0b4c71dfed9a
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_aarch64.S
@@ -0,0 +1,245 @@
+// The content of this file is AArch64-only:
+#if defined(__aarch64__)
+
+#include "sanitizer_common/sanitizer_asm.h"
+
+#if defined(__APPLE__)
+.align  2
+
+.section  __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+.long _setjmp$non_lazy_ptr
+_setjmp$non_lazy_ptr:
+.indirect_symbol _setjmp
+.long 0
+
+.section  __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+.long __setjmp$non_lazy_ptr
+__setjmp$non_lazy_ptr:
+.indirect_symbol __setjmp
+.long 0
+
+.section  __DATA,__nl_symbol_ptr,non_lazy_symbol_pointers
+.long _sigsetjmp$non_lazy_ptr
+_sigsetjmp$non_lazy_ptr:
+.indirect_symbol _sigsetjmp
+.long 0
+#endif
+
+#if !defined(__APPLE__)
+.section .text
+#else
+.section __TEXT,__text
+.align 3
+#endif
+
+ASM_HIDDEN(__tsan_setjmp)
+.comm _ZN14__interception11real_setjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(setjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp))
+ASM_SYMBOL_INTERCEPTOR(setjmp):
+  CFI_STARTPROC
+
+  // Save frame/link register
+  stp     x29, x30, [sp, -32]!
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (29, -32)
+  CFI_OFFSET (30, -24)
+
+  // Adjust the SP for previous frame
+  add     x29, sp, 0
+  CFI_DEF_CFA_REGISTER (29)
+
+  // Save env parameter
+  str     x0, [sp, 16]
+  CFI_OFFSET (0, -16)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  add     x0, x29, 32
+
+  // call tsan interceptor
+  bl      ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env parameter
+  ldr     x0, [sp, 16]
+  CFI_RESTORE (0)
+
+  // Restore frame/link register
+  ldp     x29, x30, [sp], 32
+  CFI_RESTORE (29)
+  CFI_RESTORE (30)
+  CFI_DEF_CFA (31, 0)
+
+  // tail jump to libc setjmp
+#if !defined(__APPLE__)
+  adrp    x1, :got:_ZN14__interception11real_setjmpE
+  ldr     x1, [x1, #:got_lo12:_ZN14__interception11real_setjmpE]
+  ldr     x1, [x1]
+#else
+  adrp    x1, _setjmp$non_lazy_ptr at page
+  add     x1, x1, _setjmp$non_lazy_ptr at pageoff
+  ldr     x1, [x1]
+#endif
+  br      x1
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp))
+
+.comm _ZN14__interception12real__setjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(_setjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp))
+ASM_SYMBOL_INTERCEPTOR(_setjmp):
+  CFI_STARTPROC
+
+  // Save frame/link register
+  stp     x29, x30, [sp, -32]!
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (29, -32)
+  CFI_OFFSET (30, -24)
+
+  // Adjust the SP for previous frame
+  add     x29, sp, 0
+  CFI_DEF_CFA_REGISTER (29)
+
+  // Save env parameter
+  str     x0, [sp, 16]
+  CFI_OFFSET (0, -16)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  add     x0, x29, 32
+
+  // call tsan interceptor
+  bl      ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env parameter
+  ldr     x0, [sp, 16]
+  CFI_RESTORE (0)
+
+  // Restore frame/link register
+  ldp     x29, x30, [sp], 32
+  CFI_RESTORE (29)
+  CFI_RESTORE (30)
+  CFI_DEF_CFA (31, 0)
+
+  // tail jump to libc setjmp
+#if !defined(__APPLE__)
+  adrp    x1, :got:_ZN14__interception12real__setjmpE
+  ldr     x1, [x1, #:got_lo12:_ZN14__interception12real__setjmpE]
+  ldr     x1, [x1]
+#else
+  adrp    x1, __setjmp$non_lazy_ptr at page
+  add     x1, x1, __setjmp$non_lazy_ptr at pageoff
+  ldr     x1, [x1]
+#endif
+  br      x1
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_setjmp))
+
+.comm _ZN14__interception14real_sigsetjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
+ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
+  CFI_STARTPROC
+
+  // Save frame/link register
+  stp     x29, x30, [sp, -32]!
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (29, -32)
+  CFI_OFFSET (30, -24)
+
+  // Adjust the SP for previous frame
+  add     x29, sp, 0
+  CFI_DEF_CFA_REGISTER (29)
+
+  // Save env and savesigs parameter
+  stp     x0, x1, [sp, 16]
+  CFI_OFFSET (0, -16)
+  CFI_OFFSET (1, -8)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  add     x0, x29, 32
+
+  // call tsan interceptor
+  bl      ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env and savesigs parameter
+  ldp     x0, x1, [sp, 16]
+  CFI_RESTORE (0)
+  CFI_RESTORE (1)
+
+  // Restore frame/link register
+  ldp     x29, x30, [sp], 32
+  CFI_RESTORE (29)
+  CFI_RESTORE (30)
+  CFI_DEF_CFA (31, 0)
+
+  // tail jump to libc sigsetjmp
+#if !defined(__APPLE__)
+  adrp    x2, :got:_ZN14__interception14real_sigsetjmpE
+  ldr     x2, [x2, #:got_lo12:_ZN14__interception14real_sigsetjmpE]
+  ldr     x2, [x2]
+#else
+  adrp    x2, _sigsetjmp$non_lazy_ptr at page
+  add     x2, x2, _sigsetjmp$non_lazy_ptr at pageoff
+  ldr     x2, [x2]
+#endif
+  br      x2
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
+
+#if !defined(__APPLE__)
+.comm _ZN14__interception16real___sigsetjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
+ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
+  CFI_STARTPROC
+
+  // Save frame/link register
+  stp     x29, x30, [sp, -32]!
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (29, -32)
+  CFI_OFFSET (30, -24)
+
+  // Adjust the SP for previous frame
+  add     x29, sp, 0
+  CFI_DEF_CFA_REGISTER (29)
+
+  // Save env and savesigs parameter
+  stp     x0, x1, [sp, 16]
+  CFI_OFFSET (0, -16)
+  CFI_OFFSET (1, -8)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  add     x0, x29, 32
+
+  // call tsan interceptor
+  bl      ASM_SYMBOL(__tsan_setjmp)
+
+  // Restore env and savesigs parameter
+  ldp     x0, x1, [sp, 16]
+  CFI_RESTORE (0)
+  CFI_RESTORE (1)
+
+  // Restore frame/link register
+  ldp     x29, x30, [sp], 32
+  CFI_RESTORE (29)
+  CFI_RESTORE (30)
+  CFI_DEF_CFA (31, 0)
+
+  // tail jump to libc __sigsetjmp
+#if !defined(__APPLE__)
+  adrp    x2, :got:_ZN14__interception16real___sigsetjmpE
+  ldr     x2, [x2, #:got_lo12:_ZN14__interception16real___sigsetjmpE]
+  ldr     x2, [x2]
+#else
+  adrp    x2, ASM_SYMBOL(__sigsetjmp)@page
+  add     x2, x2, ASM_SYMBOL(__sigsetjmp)@pageoff
+#endif
+  br      x2
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
+#endif
+
+NO_EXEC_STACK_DIRECTIVE
+
+#endif

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl_access.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_access.cpp
new file mode 100644
index 0000000000000..7365fdaa30384
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_access.cpp
@@ -0,0 +1,604 @@
+//===-- tsan_rtl_access.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Definitions of memory access and function entry/exit entry points.
+//===----------------------------------------------------------------------===//
+
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+namespace v3 {
+
+ALWAYS_INLINE USED bool TryTraceMemoryAccess(ThreadState *thr, uptr pc,
+                                             uptr addr, uptr size,
+                                             AccessType typ) {
+  DCHECK(size == 1 || size == 2 || size == 4 || size == 8);
+  if (!kCollectHistory)
+    return true;
+  EventAccess *ev;
+  if (UNLIKELY(!TraceAcquire(thr, &ev)))
+    return false;
+  u64 size_log = size == 1 ? 0 : size == 2 ? 1 : size == 4 ? 2 : 3;
+  uptr pc_delta = pc - thr->trace_prev_pc + (1 << (EventAccess::kPCBits - 1));
+  thr->trace_prev_pc = pc;
+  if (LIKELY(pc_delta < (1 << EventAccess::kPCBits))) {
+    ev->is_access = 1;
+    ev->is_read = !!(typ & kAccessRead);
+    ev->is_atomic = !!(typ & kAccessAtomic);
+    ev->size_log = size_log;
+    ev->pc_delta = pc_delta;
+    DCHECK_EQ(ev->pc_delta, pc_delta);
+    ev->addr = CompressAddr(addr);
+    TraceRelease(thr, ev);
+    return true;
+  }
+  auto *evex = reinterpret_cast<EventAccessExt *>(ev);
+  evex->is_access = 0;
+  evex->is_func = 0;
+  evex->type = EventType::kAccessExt;
+  evex->is_read = !!(typ & kAccessRead);
+  evex->is_atomic = !!(typ & kAccessAtomic);
+  evex->size_log = size_log;
+  evex->addr = CompressAddr(addr);
+  evex->pc = pc;
+  TraceRelease(thr, evex);
+  return true;
+}
+
+ALWAYS_INLINE USED bool TryTraceMemoryAccessRange(ThreadState *thr, uptr pc,
+                                                  uptr addr, uptr size,
+                                                  AccessType typ) {
+  if (!kCollectHistory)
+    return true;
+  EventAccessRange *ev;
+  if (UNLIKELY(!TraceAcquire(thr, &ev)))
+    return false;
+  thr->trace_prev_pc = pc;
+  ev->is_access = 0;
+  ev->is_func = 0;
+  ev->type = EventType::kAccessRange;
+  ev->is_read = !!(typ & kAccessRead);
+  ev->is_free = !!(typ & kAccessFree);
+  ev->size_lo = size;
+  ev->pc = CompressAddr(pc);
+  ev->addr = CompressAddr(addr);
+  ev->size_hi = size >> EventAccessRange::kSizeLoBits;
+  TraceRelease(thr, ev);
+  return true;
+}
+
+void TraceMemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                            AccessType typ) {
+  if (LIKELY(TryTraceMemoryAccessRange(thr, pc, addr, size, typ)))
+    return;
+  TraceSwitchPart(thr);
+  UNUSED bool res = TryTraceMemoryAccessRange(thr, pc, addr, size, typ);
+  DCHECK(res);
+}
+
+void TraceFunc(ThreadState *thr, uptr pc) {
+  if (LIKELY(TryTraceFunc(thr, pc)))
+    return;
+  TraceSwitchPart(thr);
+  UNUSED bool res = TryTraceFunc(thr, pc);
+  DCHECK(res);
+}
+
+void TraceMutexLock(ThreadState *thr, EventType type, uptr pc, uptr addr,
+                    StackID stk) {
+  DCHECK(type == EventType::kLock || type == EventType::kRLock);
+  if (!kCollectHistory)
+    return;
+  EventLock ev;
+  ev.is_access = 0;
+  ev.is_func = 0;
+  ev.type = type;
+  ev.pc = CompressAddr(pc);
+  ev.stack_lo = stk;
+  ev.stack_hi = stk >> EventLock::kStackIDLoBits;
+  ev._ = 0;
+  ev.addr = CompressAddr(addr);
+  TraceEvent(thr, ev);
+}
+
+void TraceMutexUnlock(ThreadState *thr, uptr addr) {
+  if (!kCollectHistory)
+    return;
+  EventUnlock ev;
+  ev.is_access = 0;
+  ev.is_func = 0;
+  ev.type = EventType::kUnlock;
+  ev._ = 0;
+  ev.addr = CompressAddr(addr);
+  TraceEvent(thr, ev);
+}
+
+void TraceTime(ThreadState *thr) {
+  if (!kCollectHistory)
+    return;
+  EventTime ev;
+  ev.is_access = 0;
+  ev.is_func = 0;
+  ev.type = EventType::kTime;
+  ev.sid = static_cast<u64>(thr->sid);
+  ev.epoch = static_cast<u64>(thr->epoch);
+  ev._ = 0;
+  TraceEvent(thr, ev);
+}
+
+}  // namespace v3
+
+ALWAYS_INLINE
+Shadow LoadShadow(u64 *p) {
+  u64 raw = atomic_load((atomic_uint64_t *)p, memory_order_relaxed);
+  return Shadow(raw);
+}
+
+ALWAYS_INLINE
+void StoreShadow(u64 *sp, u64 s) {
+  atomic_store((atomic_uint64_t *)sp, s, memory_order_relaxed);
+}
+
+ALWAYS_INLINE
+void StoreIfNotYetStored(u64 *sp, u64 *s) {
+  StoreShadow(sp, *s);
+  *s = 0;
+}
+
+extern "C" void __tsan_report_race();
+
+ALWAYS_INLINE
+void HandleRace(ThreadState *thr, u64 *shadow_mem, Shadow cur, Shadow old) {
+  thr->racy_state[0] = cur.raw();
+  thr->racy_state[1] = old.raw();
+  thr->racy_shadow_addr = shadow_mem;
+#if !SANITIZER_GO
+  HACKY_CALL(__tsan_report_race);
+#else
+  ReportRace(thr);
+#endif
+}
+
+static inline bool HappensBefore(Shadow old, ThreadState *thr) {
+  return thr->clock.get(old.TidWithIgnore()) >= old.epoch();
+}
+
+ALWAYS_INLINE
+void MemoryAccessImpl1(ThreadState *thr, uptr addr, int kAccessSizeLog,
+                       bool kAccessIsWrite, bool kIsAtomic, u64 *shadow_mem,
+                       Shadow cur) {
+  // This potentially can live in an MMX/SSE scratch register.
+  // The required intrinsics are:
+  // __m128i _mm_move_epi64(__m128i*);
+  // _mm_storel_epi64(u64*, __m128i);
+  u64 store_word = cur.raw();
+  bool stored = false;
+
+  // scan all the shadow values and dispatch to 4 categories:
+  // same, replace, candidate and race (see comments below).
+  // we consider only 3 cases regarding access sizes:
+  // equal, intersect and not intersect. initially I considered
+  // larger and smaller as well, it allowed to replace some
+  // 'candidates' with 'same' or 'replace', but I think
+  // it's just not worth it (performance- and complexity-wise).
+
+  Shadow old(0);
+
+  // It release mode we manually unroll the loop,
+  // because empirically gcc generates better code this way.
+  // However, we can't afford unrolling in debug mode, because the function
+  // consumes almost 4K of stack. Gtest gives only 4K of stack to death test
+  // threads, which is not enough for the unrolled loop.
+#if SANITIZER_DEBUG
+  for (int idx = 0; idx < 4; idx++) {
+#  include "tsan_update_shadow_word.inc"
+  }
+#else
+  int idx = 0;
+#  include "tsan_update_shadow_word.inc"
+  idx = 1;
+  if (stored) {
+#  include "tsan_update_shadow_word.inc"
+  } else {
+#  include "tsan_update_shadow_word.inc"
+  }
+  idx = 2;
+  if (stored) {
+#  include "tsan_update_shadow_word.inc"
+  } else {
+#  include "tsan_update_shadow_word.inc"
+  }
+  idx = 3;
+  if (stored) {
+#  include "tsan_update_shadow_word.inc"
+  } else {
+#  include "tsan_update_shadow_word.inc"
+  }
+#endif
+
+  // we did not find any races and had already stored
+  // the current access info, so we are done
+  if (LIKELY(stored))
+    return;
+  // choose a random candidate slot and replace it
+  StoreShadow(shadow_mem + (cur.epoch() % kShadowCnt), store_word);
+  return;
+RACE:
+  HandleRace(thr, shadow_mem, cur, old);
+  return;
+}
+
+void UnalignedMemoryAccess(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                           AccessType typ) {
+  DCHECK(!(typ & kAccessAtomic));
+  const bool kAccessIsWrite = !(typ & kAccessRead);
+  const bool kIsAtomic = false;
+  while (size) {
+    int size1 = 1;
+    int kAccessSizeLog = kSizeLog1;
+    if (size >= 8 && (addr & ~7) == ((addr + 7) & ~7)) {
+      size1 = 8;
+      kAccessSizeLog = kSizeLog8;
+    } else if (size >= 4 && (addr & ~7) == ((addr + 3) & ~7)) {
+      size1 = 4;
+      kAccessSizeLog = kSizeLog4;
+    } else if (size >= 2 && (addr & ~7) == ((addr + 1) & ~7)) {
+      size1 = 2;
+      kAccessSizeLog = kSizeLog2;
+    }
+    MemoryAccess(thr, pc, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic);
+    addr += size1;
+    size -= size1;
+  }
+}
+
+ALWAYS_INLINE
+bool ContainsSameAccessSlow(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+  Shadow cur(a);
+  for (uptr i = 0; i < kShadowCnt; i++) {
+    Shadow old(LoadShadow(&s[i]));
+    if (Shadow::Addr0AndSizeAreEqual(cur, old) &&
+        old.TidWithIgnore() == cur.TidWithIgnore() &&
+        old.epoch() > sync_epoch && old.IsAtomic() == cur.IsAtomic() &&
+        old.IsRead() <= cur.IsRead())
+      return true;
+  }
+  return false;
+}
+
+#if TSAN_VECTORIZE
+#  define SHUF(v0, v1, i0, i1, i2, i3)                    \
+    _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v0), \
+                                    _mm_castsi128_ps(v1), \
+                                    (i0)*1 + (i1)*4 + (i2)*16 + (i3)*64))
+ALWAYS_INLINE
+bool ContainsSameAccessFast(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+  // This is an optimized version of ContainsSameAccessSlow.
+  // load current access into access[0:63]
+  const m128 access = _mm_cvtsi64_si128(a);
+  // duplicate high part of access in addr0:
+  // addr0[0:31]        = access[32:63]
+  // addr0[32:63]       = access[32:63]
+  // addr0[64:95]       = access[32:63]
+  // addr0[96:127]      = access[32:63]
+  const m128 addr0 = SHUF(access, access, 1, 1, 1, 1);
+  // load 4 shadow slots
+  const m128 shadow0 = _mm_load_si128((__m128i *)s);
+  const m128 shadow1 = _mm_load_si128((__m128i *)s + 1);
+  // load high parts of 4 shadow slots into addr_vect:
+  // addr_vect[0:31]    = shadow0[32:63]
+  // addr_vect[32:63]   = shadow0[96:127]
+  // addr_vect[64:95]   = shadow1[32:63]
+  // addr_vect[96:127]  = shadow1[96:127]
+  m128 addr_vect = SHUF(shadow0, shadow1, 1, 3, 1, 3);
+  if (!is_write) {
+    // set IsRead bit in addr_vect
+    const m128 rw_mask1 = _mm_cvtsi64_si128(1 << 15);
+    const m128 rw_mask = SHUF(rw_mask1, rw_mask1, 0, 0, 0, 0);
+    addr_vect = _mm_or_si128(addr_vect, rw_mask);
+  }
+  // addr0 == addr_vect?
+  const m128 addr_res = _mm_cmpeq_epi32(addr0, addr_vect);
+  // epoch1[0:63]       = sync_epoch
+  const m128 epoch1 = _mm_cvtsi64_si128(sync_epoch);
+  // epoch[0:31]        = sync_epoch[0:31]
+  // epoch[32:63]       = sync_epoch[0:31]
+  // epoch[64:95]       = sync_epoch[0:31]
+  // epoch[96:127]      = sync_epoch[0:31]
+  const m128 epoch = SHUF(epoch1, epoch1, 0, 0, 0, 0);
+  // load low parts of shadow cell epochs into epoch_vect:
+  // epoch_vect[0:31]   = shadow0[0:31]
+  // epoch_vect[32:63]  = shadow0[64:95]
+  // epoch_vect[64:95]  = shadow1[0:31]
+  // epoch_vect[96:127] = shadow1[64:95]
+  const m128 epoch_vect = SHUF(shadow0, shadow1, 0, 2, 0, 2);
+  // epoch_vect >= sync_epoch?
+  const m128 epoch_res = _mm_cmpgt_epi32(epoch_vect, epoch);
+  // addr_res & epoch_res
+  const m128 res = _mm_and_si128(addr_res, epoch_res);
+  // mask[0] = res[7]
+  // mask[1] = res[15]
+  // ...
+  // mask[15] = res[127]
+  const int mask = _mm_movemask_epi8(res);
+  return mask != 0;
+}
+#endif
+
+ALWAYS_INLINE
+bool ContainsSameAccess(u64 *s, u64 a, u64 sync_epoch, bool is_write) {
+#if TSAN_VECTORIZE
+  bool res = ContainsSameAccessFast(s, a, sync_epoch, is_write);
+  // NOTE: this check can fail if the shadow is concurrently mutated
+  // by other threads. But it still can be useful if you modify
+  // ContainsSameAccessFast and want to ensure that it's not completely broken.
+  // DCHECK_EQ(res, ContainsSameAccessSlow(s, a, sync_epoch, is_write));
+  return res;
+#else
+  return ContainsSameAccessSlow(s, a, sync_epoch, is_write);
+#endif
+}
+
+ALWAYS_INLINE USED void MemoryAccess(ThreadState *thr, uptr pc, uptr addr,
+                                     int kAccessSizeLog, bool kAccessIsWrite,
+                                     bool kIsAtomic) {
+  RawShadow *shadow_mem = MemToShadow(addr);
+  DPrintf2(
+      "#%d: MemoryAccess: @%p %p size=%d"
+      " is_write=%d shadow_mem=%p {%zx, %zx, %zx, %zx}\n",
+      (int)thr->fast_state.tid(), (void *)pc, (void *)addr,
+      (int)(1 << kAccessSizeLog), kAccessIsWrite, shadow_mem,
+      (uptr)shadow_mem[0], (uptr)shadow_mem[1], (uptr)shadow_mem[2],
+      (uptr)shadow_mem[3]);
+#if SANITIZER_DEBUG
+  if (!IsAppMem(addr)) {
+    Printf("Access to non app mem %zx\n", addr);
+    DCHECK(IsAppMem(addr));
+  }
+  if (!IsShadowMem(shadow_mem)) {
+    Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
+    DCHECK(IsShadowMem(shadow_mem));
+  }
+#endif
+
+  if (!SANITIZER_GO && !kAccessIsWrite && *shadow_mem == kShadowRodata) {
+    // Access to .rodata section, no races here.
+    // Measurements show that it can be 10-20% of all memory accesses.
+    return;
+  }
+
+  FastState fast_state = thr->fast_state;
+  if (UNLIKELY(fast_state.GetIgnoreBit())) {
+    return;
+  }
+
+  Shadow cur(fast_state);
+  cur.SetAddr0AndSizeLog(addr & 7, kAccessSizeLog);
+  cur.SetWrite(kAccessIsWrite);
+  cur.SetAtomic(kIsAtomic);
+
+  if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch,
+                                kAccessIsWrite))) {
+    return;
+  }
+
+  if (kCollectHistory) {
+    fast_state.IncrementEpoch();
+    thr->fast_state = fast_state;
+    TraceAddEvent(thr, fast_state, EventTypeMop, pc);
+    cur.IncrementEpoch();
+  }
+
+  MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+                    shadow_mem, cur);
+}
+
+// Called by MemoryAccessRange in tsan_rtl_thread.cpp
+ALWAYS_INLINE USED void MemoryAccessImpl(ThreadState *thr, uptr addr,
+                                         int kAccessSizeLog,
+                                         bool kAccessIsWrite, bool kIsAtomic,
+                                         u64 *shadow_mem, Shadow cur) {
+  if (LIKELY(ContainsSameAccess(shadow_mem, cur.raw(), thr->fast_synch_epoch,
+                                kAccessIsWrite))) {
+    return;
+  }
+
+  MemoryAccessImpl1(thr, addr, kAccessSizeLog, kAccessIsWrite, kIsAtomic,
+                    shadow_mem, cur);
+}
+
+static void MemoryRangeSet(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                           u64 val) {
+  (void)thr;
+  (void)pc;
+  if (size == 0)
+    return;
+  // FIXME: fix me.
+  uptr offset = addr % kShadowCell;
+  if (offset) {
+    offset = kShadowCell - offset;
+    if (size <= offset)
+      return;
+    addr += offset;
+    size -= offset;
+  }
+  DCHECK_EQ(addr % 8, 0);
+  // If a user passes some insane arguments (memset(0)),
+  // let it just crash as usual.
+  if (!IsAppMem(addr) || !IsAppMem(addr + size - 1))
+    return;
+  // Don't want to touch lots of shadow memory.
+  // If a program maps 10MB stack, there is no need reset the whole range.
+  size = (size + (kShadowCell - 1)) & ~(kShadowCell - 1);
+  // UnmapOrDie/MmapFixedNoReserve does not work on Windows.
+  if (SANITIZER_WINDOWS || size < common_flags()->clear_shadow_mmap_threshold) {
+    RawShadow *p = MemToShadow(addr);
+    CHECK(IsShadowMem(p));
+    CHECK(IsShadowMem(p + size * kShadowCnt / kShadowCell - 1));
+    // FIXME: may overwrite a part outside the region
+    for (uptr i = 0; i < size / kShadowCell * kShadowCnt;) {
+      p[i++] = val;
+      for (uptr j = 1; j < kShadowCnt; j++) p[i++] = 0;
+    }
+  } else {
+    // The region is big, reset only beginning and end.
+    const uptr kPageSize = GetPageSizeCached();
+    RawShadow *begin = MemToShadow(addr);
+    RawShadow *end = begin + size / kShadowCell * kShadowCnt;
+    RawShadow *p = begin;
+    // Set at least first kPageSize/2 to page boundary.
+    while ((p < begin + kPageSize / kShadowSize / 2) || ((uptr)p % kPageSize)) {
+      *p++ = val;
+      for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0;
+    }
+    // Reset middle part.
+    RawShadow *p1 = p;
+    p = RoundDown(end, kPageSize);
+    if (!MmapFixedSuperNoReserve((uptr)p1, (uptr)p - (uptr)p1))
+      Die();
+    // Set the ending.
+    while (p < end) {
+      *p++ = val;
+      for (uptr j = 1; j < kShadowCnt; j++) *p++ = 0;
+    }
+  }
+}
+
+void MemoryResetRange(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+  MemoryRangeSet(thr, pc, addr, size, 0);
+}
+
+void MemoryRangeFreed(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+  // Processing more than 1k (4k of shadow) is expensive,
+  // can cause excessive memory consumption (user does not necessary touch
+  // the whole range) and most likely unnecessary.
+  if (size > 1024)
+    size = 1024;
+  CHECK_EQ(thr->is_freeing, false);
+  thr->is_freeing = true;
+  MemoryAccessRange(thr, pc, addr, size, true);
+  thr->is_freeing = false;
+  if (kCollectHistory) {
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
+  }
+  Shadow s(thr->fast_state);
+  s.ClearIgnoreBit();
+  s.MarkAsFreed();
+  s.SetWrite(true);
+  s.SetAddr0AndSizeLog(0, 3);
+  MemoryRangeSet(thr, pc, addr, size, s.raw());
+}
+
+void MemoryRangeImitateWrite(ThreadState *thr, uptr pc, uptr addr, uptr size) {
+  if (kCollectHistory) {
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeMop, pc);
+  }
+  Shadow s(thr->fast_state);
+  s.ClearIgnoreBit();
+  s.SetWrite(true);
+  s.SetAddr0AndSizeLog(0, 3);
+  MemoryRangeSet(thr, pc, addr, size, s.raw());
+}
+
+void MemoryRangeImitateWriteOrResetRange(ThreadState *thr, uptr pc, uptr addr,
+                                         uptr size) {
+  if (thr->ignore_reads_and_writes == 0)
+    MemoryRangeImitateWrite(thr, pc, addr, size);
+  else
+    MemoryResetRange(thr, pc, addr, size);
+}
+
+void MemoryAccessRange(ThreadState *thr, uptr pc, uptr addr, uptr size,
+                       bool is_write) {
+  if (size == 0)
+    return;
+
+  RawShadow *shadow_mem = MemToShadow(addr);
+  DPrintf2("#%d: MemoryAccessRange: @%p %p size=%d is_write=%d\n", thr->tid,
+           (void *)pc, (void *)addr, (int)size, is_write);
+
+#if SANITIZER_DEBUG
+  if (!IsAppMem(addr)) {
+    Printf("Access to non app mem %zx\n", addr);
+    DCHECK(IsAppMem(addr));
+  }
+  if (!IsAppMem(addr + size - 1)) {
+    Printf("Access to non app mem %zx\n", addr + size - 1);
+    DCHECK(IsAppMem(addr + size - 1));
+  }
+  if (!IsShadowMem(shadow_mem)) {
+    Printf("Bad shadow addr %p (%zx)\n", shadow_mem, addr);
+    DCHECK(IsShadowMem(shadow_mem));
+  }
+  if (!IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1)) {
+    Printf("Bad shadow addr %p (%zx)\n", shadow_mem + size * kShadowCnt / 8 - 1,
+           addr + size - 1);
+    DCHECK(IsShadowMem(shadow_mem + size * kShadowCnt / 8 - 1));
+  }
+#endif
+
+  if (*shadow_mem == kShadowRodata) {
+    DCHECK(!is_write);
+    // Access to .rodata section, no races here.
+    // Measurements show that it can be 10-20% of all memory accesses.
+    return;
+  }
+
+  FastState fast_state = thr->fast_state;
+  if (fast_state.GetIgnoreBit())
+    return;
+
+  fast_state.IncrementEpoch();
+  thr->fast_state = fast_state;
+  TraceAddEvent(thr, fast_state, EventTypeMop, pc);
+
+  bool unaligned = (addr % kShadowCell) != 0;
+
+  // Handle unaligned beginning, if any.
+  for (; addr % kShadowCell && size; addr++, size--) {
+    int const kAccessSizeLog = 0;
+    Shadow cur(fast_state);
+    cur.SetWrite(is_write);
+    cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
+                     cur);
+  }
+  if (unaligned)
+    shadow_mem += kShadowCnt;
+  // Handle middle part, if any.
+  for (; size >= kShadowCell; addr += kShadowCell, size -= kShadowCell) {
+    int const kAccessSizeLog = 3;
+    Shadow cur(fast_state);
+    cur.SetWrite(is_write);
+    cur.SetAddr0AndSizeLog(0, kAccessSizeLog);
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
+                     cur);
+    shadow_mem += kShadowCnt;
+  }
+  // Handle ending, if any.
+  for (; size; addr++, size--) {
+    int const kAccessSizeLog = 0;
+    Shadow cur(fast_state);
+    cur.SetWrite(is_write);
+    cur.SetAddr0AndSizeLog(addr & (kShadowCell - 1), kAccessSizeLog);
+    MemoryAccessImpl(thr, addr, kAccessSizeLog, is_write, false, shadow_mem,
+                     cur);
+  }
+}
+
+}  // namespace __tsan
+
+#if !SANITIZER_GO
+// Must be included in this file to make sure everything is inlined.
+#  include "tsan_interface.inc"
+#endif

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl_amd64.S b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_amd64.S
new file mode 100644
index 0000000000000..632b19d181580
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_amd64.S
@@ -0,0 +1,446 @@
+// The content of this file is x86_64-only:
+#if defined(__x86_64__)
+
+#include "sanitizer_common/sanitizer_asm.h"
+
+#if !defined(__APPLE__)
+.section .text
+#else
+.section __TEXT,__text
+#endif
+
+ASM_HIDDEN(__tsan_trace_switch)
+.globl ASM_SYMBOL(__tsan_trace_switch_thunk)
+ASM_SYMBOL(__tsan_trace_switch_thunk):
+  CFI_STARTPROC
+  _CET_ENDBR
+  # Save scratch registers.
+  push %rax
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rax, 0)
+  push %rcx
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rcx, 0)
+  push %rdx
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rdx, 0)
+  push %rsi
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rsi, 0)
+  push %rdi
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rdi, 0)
+  push %r8
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%r8, 0)
+  push %r9
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%r9, 0)
+  push %r10
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%r10, 0)
+  push %r11
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%r11, 0)
+  # All XMM registers are caller-saved.
+  sub $0x100, %rsp
+  CFI_ADJUST_CFA_OFFSET(0x100)
+  vmovdqu %xmm0, 0x0(%rsp)
+  vmovdqu %xmm1, 0x10(%rsp)
+  vmovdqu %xmm2, 0x20(%rsp)
+  vmovdqu %xmm3, 0x30(%rsp)
+  vmovdqu %xmm4, 0x40(%rsp)
+  vmovdqu %xmm5, 0x50(%rsp)
+  vmovdqu %xmm6, 0x60(%rsp)
+  vmovdqu %xmm7, 0x70(%rsp)
+  vmovdqu %xmm8, 0x80(%rsp)
+  vmovdqu %xmm9, 0x90(%rsp)
+  vmovdqu %xmm10, 0xa0(%rsp)
+  vmovdqu %xmm11, 0xb0(%rsp)
+  vmovdqu %xmm12, 0xc0(%rsp)
+  vmovdqu %xmm13, 0xd0(%rsp)
+  vmovdqu %xmm14, 0xe0(%rsp)
+  vmovdqu %xmm15, 0xf0(%rsp)
+  # Align stack frame.
+  push %rbx  # non-scratch
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rbx, 0)
+  mov %rsp, %rbx  # save current rsp
+  CFI_DEF_CFA_REGISTER(%rbx)
+  shr $4, %rsp  # clear 4 lsb, align to 16
+  shl $4, %rsp
+
+  call ASM_SYMBOL(__tsan_trace_switch)
+
+  # Unalign stack frame back.
+  mov %rbx, %rsp  # restore the original rsp
+  CFI_DEF_CFA_REGISTER(%rsp)
+  pop %rbx
+  CFI_ADJUST_CFA_OFFSET(-8)
+  # Restore scratch registers.
+  vmovdqu 0x0(%rsp), %xmm0
+  vmovdqu 0x10(%rsp), %xmm1
+  vmovdqu 0x20(%rsp), %xmm2
+  vmovdqu 0x30(%rsp), %xmm3
+  vmovdqu 0x40(%rsp), %xmm4
+  vmovdqu 0x50(%rsp), %xmm5
+  vmovdqu 0x60(%rsp), %xmm6
+  vmovdqu 0x70(%rsp), %xmm7
+  vmovdqu 0x80(%rsp), %xmm8
+  vmovdqu 0x90(%rsp), %xmm9
+  vmovdqu 0xa0(%rsp), %xmm10
+  vmovdqu 0xb0(%rsp), %xmm11
+  vmovdqu 0xc0(%rsp), %xmm12
+  vmovdqu 0xd0(%rsp), %xmm13
+  vmovdqu 0xe0(%rsp), %xmm14
+  vmovdqu 0xf0(%rsp), %xmm15
+  add $0x100, %rsp
+  CFI_ADJUST_CFA_OFFSET(-0x100)
+  pop %r11
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %r10
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %r9
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %r8
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %rdi
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %rsi
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %rdx
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %rcx
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %rax
+  CFI_ADJUST_CFA_OFFSET(-8)
+  CFI_RESTORE(%rax)
+  CFI_RESTORE(%rbx)
+  CFI_RESTORE(%rcx)
+  CFI_RESTORE(%rdx)
+  CFI_RESTORE(%rsi)
+  CFI_RESTORE(%rdi)
+  CFI_RESTORE(%r8)
+  CFI_RESTORE(%r9)
+  CFI_RESTORE(%r10)
+  CFI_RESTORE(%r11)
+  ret
+  CFI_ENDPROC
+
+ASM_HIDDEN(__tsan_report_race)
+.globl ASM_SYMBOL(__tsan_report_race_thunk)
+ASM_SYMBOL(__tsan_report_race_thunk):
+  CFI_STARTPROC
+  _CET_ENDBR
+  # Save scratch registers.
+  push %rax
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rax, 0)
+  push %rcx
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rcx, 0)
+  push %rdx
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rdx, 0)
+  push %rsi
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rsi, 0)
+  push %rdi
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rdi, 0)
+  push %r8
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%r8, 0)
+  push %r9
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%r9, 0)
+  push %r10
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%r10, 0)
+  push %r11
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%r11, 0)
+  # All XMM registers are caller-saved.
+  sub $0x100, %rsp
+  CFI_ADJUST_CFA_OFFSET(0x100)
+  vmovdqu %xmm0, 0x0(%rsp)
+  vmovdqu %xmm1, 0x10(%rsp)
+  vmovdqu %xmm2, 0x20(%rsp)
+  vmovdqu %xmm3, 0x30(%rsp)
+  vmovdqu %xmm4, 0x40(%rsp)
+  vmovdqu %xmm5, 0x50(%rsp)
+  vmovdqu %xmm6, 0x60(%rsp)
+  vmovdqu %xmm7, 0x70(%rsp)
+  vmovdqu %xmm8, 0x80(%rsp)
+  vmovdqu %xmm9, 0x90(%rsp)
+  vmovdqu %xmm10, 0xa0(%rsp)
+  vmovdqu %xmm11, 0xb0(%rsp)
+  vmovdqu %xmm12, 0xc0(%rsp)
+  vmovdqu %xmm13, 0xd0(%rsp)
+  vmovdqu %xmm14, 0xe0(%rsp)
+  vmovdqu %xmm15, 0xf0(%rsp)
+  # Align stack frame.
+  push %rbx  # non-scratch
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rbx, 0)
+  mov %rsp, %rbx  # save current rsp
+  CFI_DEF_CFA_REGISTER(%rbx)
+  shr $4, %rsp  # clear 4 lsb, align to 16
+  shl $4, %rsp
+
+  call ASM_SYMBOL(__tsan_report_race)
+
+  # Unalign stack frame back.
+  mov %rbx, %rsp  # restore the original rsp
+  CFI_DEF_CFA_REGISTER(%rsp)
+  pop %rbx
+  CFI_ADJUST_CFA_OFFSET(-8)
+  # Restore scratch registers.
+  vmovdqu 0x0(%rsp), %xmm0
+  vmovdqu 0x10(%rsp), %xmm1
+  vmovdqu 0x20(%rsp), %xmm2
+  vmovdqu 0x30(%rsp), %xmm3
+  vmovdqu 0x40(%rsp), %xmm4
+  vmovdqu 0x50(%rsp), %xmm5
+  vmovdqu 0x60(%rsp), %xmm6
+  vmovdqu 0x70(%rsp), %xmm7
+  vmovdqu 0x80(%rsp), %xmm8
+  vmovdqu 0x90(%rsp), %xmm9
+  vmovdqu 0xa0(%rsp), %xmm10
+  vmovdqu 0xb0(%rsp), %xmm11
+  vmovdqu 0xc0(%rsp), %xmm12
+  vmovdqu 0xd0(%rsp), %xmm13
+  vmovdqu 0xe0(%rsp), %xmm14
+  vmovdqu 0xf0(%rsp), %xmm15
+  add $0x100, %rsp
+  CFI_ADJUST_CFA_OFFSET(-0x100)
+  pop %r11
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %r10
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %r9
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %r8
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %rdi
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %rsi
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %rdx
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %rcx
+  CFI_ADJUST_CFA_OFFSET(-8)
+  pop %rax
+  CFI_ADJUST_CFA_OFFSET(-8)
+  CFI_RESTORE(%rax)
+  CFI_RESTORE(%rbx)
+  CFI_RESTORE(%rcx)
+  CFI_RESTORE(%rdx)
+  CFI_RESTORE(%rsi)
+  CFI_RESTORE(%rdi)
+  CFI_RESTORE(%r8)
+  CFI_RESTORE(%r9)
+  CFI_RESTORE(%r10)
+  CFI_RESTORE(%r11)
+  ret
+  CFI_ENDPROC
+
+ASM_HIDDEN(__tsan_setjmp)
+#if defined(__NetBSD__)
+.comm _ZN14__interception15real___setjmp14E,8,8
+#elif !defined(__APPLE__)
+.comm _ZN14__interception11real_setjmpE,8,8
+#endif
+#if defined(__NetBSD__)
+.globl ASM_SYMBOL_INTERCEPTOR(__setjmp14)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__setjmp14))
+ASM_SYMBOL_INTERCEPTOR(__setjmp14):
+#else
+.globl ASM_SYMBOL_INTERCEPTOR(setjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp))
+ASM_SYMBOL_INTERCEPTOR(setjmp):
+#endif
+  CFI_STARTPROC
+  _CET_ENDBR
+  // save env parameter
+  push %rdi
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rdi, 0)
+  // obtain SP, store in %rdi, first argument to `void __tsan_setjmp(uptr sp)`
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+  lea 8(%rsp), %rdi
+#elif defined(__linux__) || defined(__APPLE__)
+  lea 16(%rsp), %rdi
+#else
+# error "Unknown platform"
+#endif
+  // call tsan interceptor
+  call ASM_SYMBOL(__tsan_setjmp)
+  // restore env parameter
+  pop %rdi
+  CFI_ADJUST_CFA_OFFSET(-8)
+  CFI_RESTORE(%rdi)
+  // tail jump to libc setjmp
+  movl $0, %eax
+#if defined(__NetBSD__)
+  movq _ZN14__interception15real___setjmp14E at GOTPCREL(%rip), %rdx
+  jmp *(%rdx)
+#elif !defined(__APPLE__)
+  movq _ZN14__interception11real_setjmpE at GOTPCREL(%rip), %rdx
+  jmp *(%rdx)
+#else
+  jmp ASM_SYMBOL(setjmp)
+#endif
+  CFI_ENDPROC
+#if defined(__NetBSD__)
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__setjmp14))
+#else
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp))
+#endif
+
+.comm _ZN14__interception12real__setjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(_setjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp))
+ASM_SYMBOL_INTERCEPTOR(_setjmp):
+  CFI_STARTPROC
+  _CET_ENDBR
+  // save env parameter
+  push %rdi
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rdi, 0)
+  // obtain SP, store in %rdi, first argument to `void __tsan_setjmp(uptr sp)`
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+  lea 8(%rsp), %rdi
+#elif defined(__linux__) || defined(__APPLE__)
+  lea 16(%rsp), %rdi
+#else
+# error "Unknown platform"
+#endif
+  // call tsan interceptor
+  call ASM_SYMBOL(__tsan_setjmp)
+  // restore env parameter
+  pop %rdi
+  CFI_ADJUST_CFA_OFFSET(-8)
+  CFI_RESTORE(%rdi)
+  // tail jump to libc setjmp
+  movl $0, %eax
+#if !defined(__APPLE__)
+  movq _ZN14__interception12real__setjmpE at GOTPCREL(%rip), %rdx
+  jmp *(%rdx)
+#else
+  jmp ASM_SYMBOL(_setjmp)
+#endif
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_setjmp))
+
+#if defined(__NetBSD__)
+.comm _ZN14__interception18real___sigsetjmp14E,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(__sigsetjmp14)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp14))
+ASM_SYMBOL_INTERCEPTOR(__sigsetjmp14):
+#else
+.comm _ZN14__interception14real_sigsetjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
+ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
+#endif
+  CFI_STARTPROC
+  _CET_ENDBR
+  // save env parameter
+  push %rdi
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rdi, 0)
+  // save savesigs parameter
+  push %rsi
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rsi, 0)
+  // align stack frame
+  sub $8, %rsp
+  CFI_ADJUST_CFA_OFFSET(8)
+  // obtain SP, store in %rdi, first argument to `void __tsan_setjmp(uptr sp)`
+#if defined(__FreeBSD__) || defined(__NetBSD__)
+  lea 24(%rsp), %rdi
+#elif defined(__linux__) || defined(__APPLE__)
+  lea 32(%rsp), %rdi
+#else
+# error "Unknown platform"
+#endif
+  // call tsan interceptor
+  call ASM_SYMBOL(__tsan_setjmp)
+  // unalign stack frame
+  add $8, %rsp
+  CFI_ADJUST_CFA_OFFSET(-8)
+  // restore savesigs parameter
+  pop %rsi
+  CFI_ADJUST_CFA_OFFSET(-8)
+  CFI_RESTORE(%rsi)
+  // restore env parameter
+  pop %rdi
+  CFI_ADJUST_CFA_OFFSET(-8)
+  CFI_RESTORE(%rdi)
+  // tail jump to libc sigsetjmp
+  movl $0, %eax
+#if defined(__NetBSD__)
+  movq _ZN14__interception18real___sigsetjmp14E at GOTPCREL(%rip), %rdx
+  jmp *(%rdx)
+#elif !defined(__APPLE__)
+  movq _ZN14__interception14real_sigsetjmpE at GOTPCREL(%rip), %rdx
+  jmp *(%rdx)
+#else
+  jmp ASM_SYMBOL(sigsetjmp)
+#endif
+  CFI_ENDPROC
+#if defined(__NetBSD__)
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp14))
+#else
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
+#endif
+
+#if !defined(__APPLE__) && !defined(__NetBSD__)
+.comm _ZN14__interception16real___sigsetjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
+ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
+  CFI_STARTPROC
+  _CET_ENDBR
+  // save env parameter
+  push %rdi
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rdi, 0)
+  // save savesigs parameter
+  push %rsi
+  CFI_ADJUST_CFA_OFFSET(8)
+  CFI_REL_OFFSET(%rsi, 0)
+  // align stack frame
+  sub $8, %rsp
+  CFI_ADJUST_CFA_OFFSET(8)
+  // obtain SP, store in %rdi, first argument to `void __tsan_setjmp(uptr sp)`
+#if defined(__FreeBSD__)
+  lea 24(%rsp), %rdi
+#else
+  lea 32(%rsp), %rdi
+#endif
+  // call tsan interceptor
+  call ASM_SYMBOL(__tsan_setjmp)
+  // unalign stack frame
+  add $8, %rsp
+  CFI_ADJUST_CFA_OFFSET(-8)
+  // restore savesigs parameter
+  pop %rsi
+  CFI_ADJUST_CFA_OFFSET(-8)
+  CFI_RESTORE(%rsi)
+  // restore env parameter
+  pop %rdi
+  CFI_ADJUST_CFA_OFFSET(-8)
+  CFI_RESTORE(%rdi)
+  // tail jump to libc sigsetjmp
+  movl $0, %eax
+  movq _ZN14__interception16real___sigsetjmpE at GOTPCREL(%rip), %rdx
+  jmp *(%rdx)
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
+#endif  // !defined(__APPLE__) && !defined(__NetBSD__)
+
+NO_EXEC_STACK_DIRECTIVE
+
+#endif

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl_mips64.S b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_mips64.S
new file mode 100644
index 0000000000000..d0f7a3f9af989
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_mips64.S
@@ -0,0 +1,214 @@
+.section .text
+.set noreorder
+
+.hidden __tsan_setjmp
+.comm _ZN14__interception11real_setjmpE,8,8
+.globl setjmp
+.type setjmp, @function
+setjmp:
+
+  // save env parameters
+  daddiu $sp,$sp,-40
+  sd $s0,32($sp)
+  sd $ra,24($sp)
+  sd $fp,16($sp)
+  sd $gp,8($sp)
+
+  // calculate and save pointer to GOT
+  lui $gp,%hi(%neg(%gp_rel(setjmp)))
+  daddu $gp,$gp,$t9
+  daddiu $gp,$gp,%lo(%neg(%gp_rel(setjmp)))
+  move $s0,$gp
+
+  // save jmp_buf
+  sd $a0,0($sp)
+
+  // obtain $sp
+  dadd $a0,$zero,$sp
+
+  // call tsan interceptor
+  jal __tsan_setjmp
+  daddiu $a1,$a0,40
+
+  // restore jmp_buf
+  ld $a0,0($sp)
+
+  // restore gp
+  move $gp,$s0
+
+  // load pointer of libc setjmp to t9
+  dla $t9,(_ZN14__interception11real_setjmpE) 
+
+  // restore env parameters
+  ld $gp,8($sp)
+  ld $fp,16($sp)
+  ld $ra,24($sp)
+  ld $s0,32($sp)
+  daddiu $sp,$sp,40
+
+  // tail jump to libc setjmp
+  ld $t9,0($t9)
+  jr $t9
+  nop
+
+.size setjmp, .-setjmp
+
+.hidden __tsan_setjmp
+.globl _setjmp
+.comm _ZN14__interception12real__setjmpE,8,8
+.type _setjmp, @function
+_setjmp:
+
+  // Save env parameters
+  daddiu $sp,$sp,-40
+  sd $s0,32($sp)
+  sd $ra,24($sp)
+  sd $fp,16($sp)
+  sd $gp,8($sp)
+
+  // calculate and save pointer to GOT
+  lui $gp,%hi(%neg(%gp_rel(_setjmp)))
+  daddu $gp,$gp,$t9
+  daddiu $gp,$gp,%lo(%neg(%gp_rel(_setjmp)))
+  move $s0,$gp
+
+  // save jmp_buf
+  sd $a0,0($sp)
+
+  // obtain $sp
+  dadd $a0,$zero,$sp
+
+  // call tsan interceptor
+  jal __tsan_setjmp
+  daddiu $a1,$a0,40
+
+  // restore jmp_buf
+  ld $a0,0($sp)
+
+  // restore gp
+  move $gp,$s0
+
+  // load pointer of libc _setjmp to t9
+  dla $t9,(_ZN14__interception12real__setjmpE)
+
+  // restore env parameters
+  ld $gp,8($sp)
+  ld $fp,16($sp)
+  ld $ra,24($sp)
+  ld $s0,32($sp)
+  daddiu $sp,$sp,40
+
+  // tail jump to libc _setjmp
+  ld $t9,0($t9)
+  jr $t9
+  nop
+
+.size _setjmp, .-_setjmp
+
+.hidden __tsan_setjmp
+.globl sigsetjmp
+.comm _ZN14__interception14real_sigsetjmpE,8,8
+.type sigsetjmp, @function
+sigsetjmp:
+
+  // Save env parameters
+  daddiu $sp,$sp,-48
+  sd $s0,40($sp)
+  sd $ra,32($sp)
+  sd $fp,24($sp)
+  sd $gp,16($sp)
+
+  // calculate and save pointer to GOT
+  lui $gp,%hi(%neg(%gp_rel(sigsetjmp)))
+  daddu $gp,$gp,$t9
+  daddiu $gp,$gp,%lo(%neg(%gp_rel(sigsetjmp)))
+  move $s0,$gp
+
+  // save jmp_buf and savesig
+  sd $a0,0($sp)
+  sd $a1,8($sp)
+
+  // obtain $sp
+  dadd $a0,$zero,$sp
+
+  // call tsan interceptor
+  jal __tsan_setjmp
+  daddiu $a1,$a0,48
+
+  // restore jmp_buf and savesig
+  ld $a0,0($sp)
+  ld $a1,8($sp)
+
+  // restore gp
+  move $gp,$s0
+
+  // load pointer of libc sigsetjmp to t9
+  dla $t9,(_ZN14__interception14real_sigsetjmpE) 
+
+  // restore env parameters
+  ld $gp,16($sp)
+  ld $fp,24($sp)
+  ld $ra,32($sp)
+  ld $s0,40($sp)
+  daddiu $sp,$sp,48
+
+  // tail jump to libc sigsetjmp
+  ld $t9,0($t9)
+  jr $t9
+  nop
+
+.size sigsetjmp, .-sigsetjmp
+
+.hidden __tsan_setjmp
+.comm _ZN14__interception16real___sigsetjmpE,8,8
+.globl __sigsetjmp
+.type __sigsetjmp, @function
+__sigsetjmp:
+
+  // Save env parameters
+  daddiu $sp,$sp,-48
+  sd $s0,40($sp)
+  sd $ra,32($sp)
+  sd $fp,24($sp)
+  sd $gp,16($sp)
+
+  // calculate and save pointer to GOT
+  lui $gp,%hi(%neg(%gp_rel(__sigsetjmp)))
+  daddu $gp,$gp,$t9
+  daddiu $gp,$gp,%lo(%neg(%gp_rel(__sigsetjmp)))
+  move $s0,$gp
+
+  // save jmp_buf and savesig
+  sd $a0,0($sp)
+  sd $a1,8($sp)
+
+  // obtain $sp
+  dadd $a0,$zero,$sp
+
+  // call tsan interceptor
+  jal __tsan_setjmp
+  daddiu $a1,$a0,48
+
+  // restore jmp_buf and savesig
+  ld $a0,0($sp)
+  ld $a1,8($sp)
+
+  // restore gp
+  move $gp,$s0
+
+  // load pointer to libc __sigsetjmp in t9
+  dla $t9,(_ZN14__interception16real___sigsetjmpE)
+
+  // restore env parameters
+  ld $gp,16($sp)
+  ld $fp,24($sp)
+  ld $ra,32($sp)
+  ld $s0,40($sp)
+  daddiu $sp,$sp,48
+
+  // tail jump to libc __sigsetjmp
+  ld $t9,0($t9)
+  jr $t9
+  nop
+
+.size __sigsetjmp, .-__sigsetjmp

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl_mutex.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_mutex.cpp
new file mode 100644
index 0000000000000..7d6b41116aa6f
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_mutex.cpp
@@ -0,0 +1,555 @@
+//===-- tsan_rtl_mutex.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include <sanitizer_common/sanitizer_deadlock_detector_interface.h>
+#include <sanitizer_common/sanitizer_stackdepot.h>
+
+#include "tsan_rtl.h"
+#include "tsan_flags.h"
+#include "tsan_sync.h"
+#include "tsan_report.h"
+#include "tsan_symbolize.h"
+#include "tsan_platform.h"
+
+namespace __tsan {
+
+void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r);
+
+struct Callback final : public DDCallback {
+  ThreadState *thr;
+  uptr pc;
+
+  Callback(ThreadState *thr, uptr pc)
+      : thr(thr)
+      , pc(pc) {
+    DDCallback::pt = thr->proc()->dd_pt;
+    DDCallback::lt = thr->dd_lt;
+  }
+
+  StackID Unwind() override { return CurrentStackId(thr, pc); }
+  int UniqueTid() override { return thr->unique_id; }
+};
+
+void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s) {
+  Callback cb(thr, pc);
+  ctx->dd->MutexInit(&cb, &s->dd);
+  s->dd.ctx = s->GetId();
+}
+
+static void ReportMutexMisuse(ThreadState *thr, uptr pc, ReportType typ,
+    uptr addr, u64 mid) {
+  // In Go, these misuses are either impossible, or detected by std lib,
+  // or false positives (e.g. unlock in a 
diff erent thread).
+  if (SANITIZER_GO)
+    return;
+  if (!ShouldReport(thr, typ))
+    return;
+  ThreadRegistryLock l(&ctx->thread_registry);
+  ScopedReport rep(typ);
+  rep.AddMutex(mid);
+  VarSizeStackTrace trace;
+  ObtainCurrentStack(thr, pc, &trace);
+  rep.AddStack(trace, true);
+  rep.AddLocation(addr, 1);
+  OutputReport(thr, rep);
+}
+
+void MutexCreate(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexCreate %zx flagz=0x%x\n", thr->tid, addr, flagz);
+  if (!(flagz & MutexFlagLinkerInit) && IsAppMem(addr)) {
+    CHECK(!thr->is_freeing);
+    thr->is_freeing = true;
+    MemoryAccess(thr, pc, addr, 1, kAccessWrite);
+    thr->is_freeing = false;
+  }
+  SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+  Lock l(&s->mtx);
+  s->SetFlags(flagz & MutexCreationFlagMask);
+  // Save stack in the case the sync object was created before as atomic.
+  if (!SANITIZER_GO && s->creation_stack_id == 0)
+    s->creation_stack_id = CurrentStackId(thr, pc);
+}
+
+void MutexDestroy(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexDestroy %zx\n", thr->tid, addr);
+  bool unlock_locked = false;
+  u64 mid = 0;
+  u64 last_lock = 0;
+  {
+    SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
+    if (s == 0)
+      return;
+    Lock l(&s->mtx);
+    if ((flagz & MutexFlagLinkerInit) || s->IsFlagSet(MutexFlagLinkerInit) ||
+        ((flagz & MutexFlagNotStatic) && !s->IsFlagSet(MutexFlagNotStatic))) {
+      // Destroy is no-op for linker-initialized mutexes.
+      return;
+    }
+    if (common_flags()->detect_deadlocks) {
+      Callback cb(thr, pc);
+      ctx->dd->MutexDestroy(&cb, &s->dd);
+      ctx->dd->MutexInit(&cb, &s->dd);
+    }
+    if (flags()->report_destroy_locked && s->owner_tid != kInvalidTid &&
+        !s->IsFlagSet(MutexFlagBroken)) {
+      s->SetFlags(MutexFlagBroken);
+      unlock_locked = true;
+    }
+    mid = s->GetId();
+    last_lock = s->last_lock;
+    if (!unlock_locked)
+      s->Reset(thr->proc());  // must not reset it before the report is printed
+  }
+  if (unlock_locked && ShouldReport(thr, ReportTypeMutexDestroyLocked)) {
+    ThreadRegistryLock l(&ctx->thread_registry);
+    ScopedReport rep(ReportTypeMutexDestroyLocked);
+    rep.AddMutex(mid);
+    VarSizeStackTrace trace;
+    ObtainCurrentStack(thr, pc, &trace);
+    rep.AddStack(trace, true);
+    FastState last(last_lock);
+    RestoreStack(last.tid(), last.epoch(), &trace, 0);
+    rep.AddStack(trace, true);
+    rep.AddLocation(addr, 1);
+    OutputReport(thr, rep);
+
+    SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
+    if (s != 0) {
+      Lock l(&s->mtx);
+      s->Reset(thr->proc());
+    }
+  }
+  thr->mset.Remove(mid);
+  // Imitate a memory write to catch unlock-destroy races.
+  // Do this outside of sync mutex, because it can report a race which locks
+  // sync mutexes.
+  if (IsAppMem(addr))
+    MemoryAccess(thr, pc, addr, 1, kAccessWrite | kAccessFree);
+  // s will be destroyed and freed in MetaMap::FreeBlock.
+}
+
+void MutexPreLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexPreLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
+  if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) {
+    SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+    {
+      ReadLock l(&s->mtx);
+      s->UpdateFlags(flagz);
+      if (s->owner_tid != thr->tid) {
+        Callback cb(thr, pc);
+        ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
+      }
+    }
+    Callback cb(thr, pc);
+    ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+  }
+}
+
+void MutexPostLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz, int rec) {
+  DPrintf("#%d: MutexPostLock %zx flag=0x%x rec=%d\n",
+      thr->tid, addr, flagz, rec);
+  if (flagz & MutexFlagRecursiveLock)
+    CHECK_GT(rec, 0);
+  else
+    rec = 1;
+  if (IsAppMem(addr))
+    MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
+  u64 mid = 0;
+  bool pre_lock = false;
+  bool first = false;
+  bool report_double_lock = false;
+  {
+    SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+    Lock l(&s->mtx);
+    s->UpdateFlags(flagz);
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeLock, s->GetId());
+    if (s->owner_tid == kInvalidTid) {
+      CHECK_EQ(s->recursion, 0);
+      s->owner_tid = thr->tid;
+      s->last_lock = thr->fast_state.raw();
+    } else if (s->owner_tid == thr->tid) {
+      CHECK_GT(s->recursion, 0);
+    } else if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+      s->SetFlags(MutexFlagBroken);
+      report_double_lock = true;
+    }
+    first = s->recursion == 0;
+    s->recursion += rec;
+    if (first) {
+      AcquireImpl(thr, pc, &s->clock);
+      AcquireImpl(thr, pc, &s->read_clock);
+    } else if (!s->IsFlagSet(MutexFlagWriteReentrant)) {
+    }
+    thr->mset.Add(s->GetId(), true, thr->fast_state.epoch());
+    if (first && common_flags()->detect_deadlocks) {
+      pre_lock =
+          (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock);
+      Callback cb(thr, pc);
+      if (pre_lock)
+        ctx->dd->MutexBeforeLock(&cb, &s->dd, true);
+      ctx->dd->MutexAfterLock(&cb, &s->dd, true, flagz & MutexFlagTryLock);
+    }
+    mid = s->GetId();
+  }
+  if (report_double_lock)
+    ReportMutexMisuse(thr, pc, ReportTypeMutexDoubleLock, addr, mid);
+  if (first && pre_lock && common_flags()->detect_deadlocks) {
+    Callback cb(thr, pc);
+    ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+  }
+}
+
+int MutexUnlock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexUnlock %zx flagz=0x%x\n", thr->tid, addr, flagz);
+  if (IsAppMem(addr))
+    MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
+  u64 mid = 0;
+  bool report_bad_unlock = false;
+  int rec = 0;
+  {
+    SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+    Lock l(&s->mtx);
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
+    if (!SANITIZER_GO && (s->recursion == 0 || s->owner_tid != thr->tid)) {
+      if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+        s->SetFlags(MutexFlagBroken);
+        report_bad_unlock = true;
+      }
+    } else {
+      rec = (flagz & MutexFlagRecursiveUnlock) ? s->recursion : 1;
+      s->recursion -= rec;
+      if (s->recursion == 0) {
+        s->owner_tid = kInvalidTid;
+        ReleaseStoreImpl(thr, pc, &s->clock);
+      } else {
+      }
+    }
+    thr->mset.Del(s->GetId(), true);
+    if (common_flags()->detect_deadlocks && s->recursion == 0 &&
+        !report_bad_unlock) {
+      Callback cb(thr, pc);
+      ctx->dd->MutexBeforeUnlock(&cb, &s->dd, true);
+    }
+    mid = s->GetId();
+  }
+  if (report_bad_unlock)
+    ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid);
+  if (common_flags()->detect_deadlocks && !report_bad_unlock) {
+    Callback cb(thr, pc);
+    ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+  }
+  return rec;
+}
+
+void MutexPreReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexPreReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
+  if (!(flagz & MutexFlagTryLock) && common_flags()->detect_deadlocks) {
+    {
+      SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+      ReadLock l(&s->mtx);
+      s->UpdateFlags(flagz);
+      Callback cb(thr, pc);
+      ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
+    }
+    Callback cb(thr, pc);
+    ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+  }
+}
+
+void MutexPostReadLock(ThreadState *thr, uptr pc, uptr addr, u32 flagz) {
+  DPrintf("#%d: MutexPostReadLock %zx flagz=0x%x\n", thr->tid, addr, flagz);
+  if (IsAppMem(addr))
+    MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
+  u64 mid = 0;
+  bool report_bad_lock = false;
+  bool pre_lock = false;
+  {
+    SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+    ReadLock l(&s->mtx);
+    s->UpdateFlags(flagz);
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeRLock, s->GetId());
+    if (s->owner_tid != kInvalidTid) {
+      if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+        s->SetFlags(MutexFlagBroken);
+        report_bad_lock = true;
+      }
+    }
+    AcquireImpl(thr, pc, &s->clock);
+    s->last_lock = thr->fast_state.raw();
+    thr->mset.Add(s->GetId(), false, thr->fast_state.epoch());
+    if (common_flags()->detect_deadlocks) {
+      pre_lock =
+          (flagz & MutexFlagDoPreLockOnPostLock) && !(flagz & MutexFlagTryLock);
+      Callback cb(thr, pc);
+      if (pre_lock)
+        ctx->dd->MutexBeforeLock(&cb, &s->dd, false);
+      ctx->dd->MutexAfterLock(&cb, &s->dd, false, flagz & MutexFlagTryLock);
+    }
+    mid = s->GetId();
+  }
+  if (report_bad_lock)
+    ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadLock, addr, mid);
+  if (pre_lock  && common_flags()->detect_deadlocks) {
+    Callback cb(thr, pc);
+    ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+  }
+}
+
+void MutexReadUnlock(ThreadState *thr, uptr pc, uptr addr) {
+  DPrintf("#%d: MutexReadUnlock %zx\n", thr->tid, addr);
+  if (IsAppMem(addr))
+    MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
+  u64 mid = 0;
+  bool report_bad_unlock = false;
+  {
+    SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+    Lock l(&s->mtx);
+    thr->fast_state.IncrementEpoch();
+    TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
+    if (s->owner_tid != kInvalidTid) {
+      if (flags()->report_mutex_bugs && !s->IsFlagSet(MutexFlagBroken)) {
+        s->SetFlags(MutexFlagBroken);
+        report_bad_unlock = true;
+      }
+    }
+    ReleaseImpl(thr, pc, &s->read_clock);
+    if (common_flags()->detect_deadlocks && s->recursion == 0) {
+      Callback cb(thr, pc);
+      ctx->dd->MutexBeforeUnlock(&cb, &s->dd, false);
+    }
+    mid = s->GetId();
+  }
+  thr->mset.Del(mid, false);
+  if (report_bad_unlock)
+    ReportMutexMisuse(thr, pc, ReportTypeMutexBadReadUnlock, addr, mid);
+  if (common_flags()->detect_deadlocks) {
+    Callback cb(thr, pc);
+    ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+  }
+}
+
+void MutexReadOrWriteUnlock(ThreadState *thr, uptr pc, uptr addr) {
+  DPrintf("#%d: MutexReadOrWriteUnlock %zx\n", thr->tid, addr);
+  if (IsAppMem(addr))
+    MemoryAccess(thr, pc, addr, 1, kAccessRead | kAccessAtomic);
+  u64 mid = 0;
+  bool report_bad_unlock = false;
+  {
+    SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+    Lock l(&s->mtx);
+    bool write = true;
+    if (s->owner_tid == kInvalidTid) {
+      // Seems to be read unlock.
+      write = false;
+      thr->fast_state.IncrementEpoch();
+      TraceAddEvent(thr, thr->fast_state, EventTypeRUnlock, s->GetId());
+      ReleaseImpl(thr, pc, &s->read_clock);
+    } else if (s->owner_tid == thr->tid) {
+      // Seems to be write unlock.
+      thr->fast_state.IncrementEpoch();
+      TraceAddEvent(thr, thr->fast_state, EventTypeUnlock, s->GetId());
+      CHECK_GT(s->recursion, 0);
+      s->recursion--;
+      if (s->recursion == 0) {
+        s->owner_tid = kInvalidTid;
+        ReleaseStoreImpl(thr, pc, &s->clock);
+      } else {
+      }
+    } else if (!s->IsFlagSet(MutexFlagBroken)) {
+      s->SetFlags(MutexFlagBroken);
+      report_bad_unlock = true;
+    }
+    thr->mset.Del(s->GetId(), write);
+    if (common_flags()->detect_deadlocks && s->recursion == 0) {
+      Callback cb(thr, pc);
+      ctx->dd->MutexBeforeUnlock(&cb, &s->dd, write);
+    }
+    mid = s->GetId();
+  }
+  if (report_bad_unlock)
+    ReportMutexMisuse(thr, pc, ReportTypeMutexBadUnlock, addr, mid);
+  if (common_flags()->detect_deadlocks) {
+    Callback cb(thr, pc);
+    ReportDeadlock(thr, pc, ctx->dd->GetReport(&cb));
+  }
+}
+
+void MutexRepair(ThreadState *thr, uptr pc, uptr addr) {
+  DPrintf("#%d: MutexRepair %zx\n", thr->tid, addr);
+  SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+  Lock l(&s->mtx);
+  s->owner_tid = kInvalidTid;
+  s->recursion = 0;
+}
+
+void MutexInvalidAccess(ThreadState *thr, uptr pc, uptr addr) {
+  DPrintf("#%d: MutexInvalidAccess %zx\n", thr->tid, addr);
+  SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, true);
+  ReportMutexMisuse(thr, pc, ReportTypeMutexInvalidAccess, addr, s->GetId());
+}
+
+void Acquire(ThreadState *thr, uptr pc, uptr addr) {
+  DPrintf("#%d: Acquire %zx\n", thr->tid, addr);
+  if (thr->ignore_sync)
+    return;
+  SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
+  if (!s)
+    return;
+  ReadLock l(&s->mtx);
+  AcquireImpl(thr, pc, &s->clock);
+}
+
+static void UpdateClockCallback(ThreadContextBase *tctx_base, void *arg) {
+  ThreadState *thr = reinterpret_cast<ThreadState*>(arg);
+  ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base);
+  u64 epoch = tctx->epoch1;
+  if (tctx->status == ThreadStatusRunning) {
+    epoch = tctx->thr->fast_state.epoch();
+    tctx->thr->clock.NoteGlobalAcquire(epoch);
+  }
+  thr->clock.set(&thr->proc()->clock_cache, tctx->tid, epoch);
+}
+
+void AcquireGlobal(ThreadState *thr) {
+  DPrintf("#%d: AcquireGlobal\n", thr->tid);
+  if (thr->ignore_sync)
+    return;
+  ThreadRegistryLock l(&ctx->thread_registry);
+  ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateClockCallback, thr);
+}
+
+void ReleaseStoreAcquire(ThreadState *thr, uptr pc, uptr addr) {
+  DPrintf("#%d: ReleaseStoreAcquire %zx\n", thr->tid, addr);
+  if (thr->ignore_sync)
+    return;
+  SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
+  Lock l(&s->mtx);
+  thr->fast_state.IncrementEpoch();
+  // Can't increment epoch w/o writing to the trace as well.
+  TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+  ReleaseStoreAcquireImpl(thr, pc, &s->clock);
+}
+
+void Release(ThreadState *thr, uptr pc, uptr addr) {
+  DPrintf("#%d: Release %zx\n", thr->tid, addr);
+  if (thr->ignore_sync)
+    return;
+  SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
+  Lock l(&s->mtx);
+  thr->fast_state.IncrementEpoch();
+  // Can't increment epoch w/o writing to the trace as well.
+  TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+  ReleaseImpl(thr, pc, &s->clock);
+}
+
+void ReleaseStore(ThreadState *thr, uptr pc, uptr addr) {
+  DPrintf("#%d: ReleaseStore %zx\n", thr->tid, addr);
+  if (thr->ignore_sync)
+    return;
+  SyncVar *s = ctx->metamap.GetSyncOrCreate(thr, pc, addr, false);
+  Lock l(&s->mtx);
+  thr->fast_state.IncrementEpoch();
+  // Can't increment epoch w/o writing to the trace as well.
+  TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+  ReleaseStoreImpl(thr, pc, &s->clock);
+}
+
+#if !SANITIZER_GO
+static void UpdateSleepClockCallback(ThreadContextBase *tctx_base, void *arg) {
+  ThreadState *thr = reinterpret_cast<ThreadState*>(arg);
+  ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base);
+  u64 epoch = tctx->epoch1;
+  if (tctx->status == ThreadStatusRunning)
+    epoch = tctx->thr->fast_state.epoch();
+  thr->last_sleep_clock.set(&thr->proc()->clock_cache, tctx->tid, epoch);
+}
+
+void AfterSleep(ThreadState *thr, uptr pc) {
+  DPrintf("#%d: AfterSleep\n", thr->tid);
+  if (thr->ignore_sync)
+    return;
+  thr->last_sleep_stack_id = CurrentStackId(thr, pc);
+  ThreadRegistryLock l(&ctx->thread_registry);
+  ctx->thread_registry.RunCallbackForEachThreadLocked(UpdateSleepClockCallback,
+                                                      thr);
+}
+#endif
+
+void AcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) {
+  if (thr->ignore_sync)
+    return;
+  thr->clock.set(thr->fast_state.epoch());
+  thr->clock.acquire(&thr->proc()->clock_cache, c);
+}
+
+void ReleaseStoreAcquireImpl(ThreadState *thr, uptr pc, SyncClock *c) {
+  if (thr->ignore_sync)
+    return;
+  thr->clock.set(thr->fast_state.epoch());
+  thr->fast_synch_epoch = thr->fast_state.epoch();
+  thr->clock.releaseStoreAcquire(&thr->proc()->clock_cache, c);
+}
+
+void ReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
+  if (thr->ignore_sync)
+    return;
+  thr->clock.set(thr->fast_state.epoch());
+  thr->fast_synch_epoch = thr->fast_state.epoch();
+  thr->clock.release(&thr->proc()->clock_cache, c);
+}
+
+void ReleaseStoreImpl(ThreadState *thr, uptr pc, SyncClock *c) {
+  if (thr->ignore_sync)
+    return;
+  thr->clock.set(thr->fast_state.epoch());
+  thr->fast_synch_epoch = thr->fast_state.epoch();
+  thr->clock.ReleaseStore(&thr->proc()->clock_cache, c);
+}
+
+void AcquireReleaseImpl(ThreadState *thr, uptr pc, SyncClock *c) {
+  if (thr->ignore_sync)
+    return;
+  thr->clock.set(thr->fast_state.epoch());
+  thr->fast_synch_epoch = thr->fast_state.epoch();
+  thr->clock.acq_rel(&thr->proc()->clock_cache, c);
+}
+
+void ReportDeadlock(ThreadState *thr, uptr pc, DDReport *r) {
+  if (r == 0 || !ShouldReport(thr, ReportTypeDeadlock))
+    return;
+  ThreadRegistryLock l(&ctx->thread_registry);
+  ScopedReport rep(ReportTypeDeadlock);
+  for (int i = 0; i < r->n; i++) {
+    rep.AddMutex(r->loop[i].mtx_ctx0);
+    rep.AddUniqueTid((int)r->loop[i].thr_ctx);
+    rep.AddThread((int)r->loop[i].thr_ctx);
+  }
+  uptr dummy_pc = 0x42;
+  for (int i = 0; i < r->n; i++) {
+    for (int j = 0; j < (flags()->second_deadlock_stack ? 2 : 1); j++) {
+      u32 stk = r->loop[i].stk[j];
+      if (stk && stk != 0xffffffff) {
+        rep.AddStack(StackDepotGet(stk), true);
+      } else {
+        // Sometimes we fail to extract the stack trace (FIXME: investigate),
+        // but we should still produce some stack trace in the report.
+        rep.AddStack(StackTrace(&dummy_pc, 1), true);
+      }
+    }
+  }
+  OutputReport(thr, rep);
+}
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl_ppc64.S b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_ppc64.S
new file mode 100644
index 0000000000000..8285e21aa1ec7
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_ppc64.S
@@ -0,0 +1,288 @@
+#include "tsan_ppc_regs.h"
+
+        .section .text
+        .hidden __tsan_setjmp
+        .globl _setjmp
+        .type _setjmp, @function
+        .align 4
+#if _CALL_ELF == 2
+_setjmp:
+#else
+	.section ".opd","aw"
+	.align 3
+_setjmp:
+	.quad   .L._setjmp,.TOC. at tocbase,0
+	.previous
+#endif
+.L._setjmp:
+        mflr    r0
+        stdu    r1,-48(r1)
+        std     r2,24(r1)
+        std     r3,32(r1)
+        std     r0,40(r1)
+        // r3 is the original stack pointer.
+        addi    r3,r1,48
+        // r4 is the mangled stack pointer (see glibc)
+        ld      r4,-28696(r13)
+        xor     r4,r3,r4
+        // Materialize a TOC in case we were called from libc.
+        // For big-endian, we load the TOC from the OPD.  For little-
+        // endian, we use the .TOC. symbol to find it.
+        nop
+        bcl     20,31,0f
+0:
+        mflr    r2
+#if _CALL_ELF == 2
+        addis   r2,r2,.TOC.-0b at ha
+        addi    r2,r2,.TOC.-0b at l
+#else
+        addis   r2,r2,_setjmp-0b at ha
+        addi    r2,r2,_setjmp-0b at l
+        ld      r2,8(r2)
+#endif
+        // Call the interceptor.
+        bl      __tsan_setjmp
+        nop
+        // Restore regs needed for setjmp.
+        ld      r3,32(r1)
+        ld      r0,40(r1)
+        // Emulate the real setjmp function.  We do this because we can't
+        // perform a sibcall:  The real setjmp function trashes the TOC
+        // pointer, and with a sibcall we have no way to restore it.
+        // This way we can make sure our caller's stack pointer and
+        // link register are saved correctly in the jmpbuf.
+        ld      r6,-28696(r13)
+        addi    r5,r1,48  // original stack ptr of caller
+        xor     r5,r6,r5
+        std     r5,0(r3)  // mangled stack ptr of caller
+        ld      r5,24(r1)
+        std     r5,8(r3)  // caller's saved TOC pointer
+        xor     r0,r6,r0
+        std     r0,16(r3) // caller's mangled return address
+        mfcr    r0
+        // Nonvolatiles.
+        std     r14,24(r3)
+        stfd    f14,176(r3)
+        stw     r0,172(r3) // CR
+        std     r15,32(r3)
+        stfd    f15,184(r3)
+        std     r16,40(r3)
+        stfd    f16,192(r3)
+        std     r17,48(r3)
+        stfd    f17,200(r3)
+        std     r18,56(r3)
+        stfd    f18,208(r3)
+        std     r19,64(r3)
+        stfd    f19,216(r3)
+        std     r20,72(r3)
+        stfd    f20,224(r3)
+        std     r21,80(r3)
+        stfd    f21,232(r3)
+        std     r22,88(r3)
+        stfd    f22,240(r3)
+        std     r23,96(r3)
+        stfd    f23,248(r3)
+        std     r24,104(r3)
+        stfd    f24,256(r3)
+        std     r25,112(r3)
+        stfd    f25,264(r3)
+        std     r26,120(r3)
+        stfd    f26,272(r3)
+        std     r27,128(r3)
+        stfd    f27,280(r3)
+        std     r28,136(r3)
+        stfd    f28,288(r3)
+        std     r29,144(r3)
+        stfd    f29,296(r3)
+        std     r30,152(r3)
+        stfd    f30,304(r3)
+        std     r31,160(r3)
+        stfd    f31,312(r3)
+        addi    r5,r3,320
+        mfspr   r0,256
+        stw     r0,168(r3)  // VRSAVE
+        addi    r6,r5,16
+        stvx    v20,0,r5
+        addi    r5,r5,32
+        stvx    v21,0,r6
+        addi    r6,r6,32
+        stvx    v22,0,r5
+        addi    r5,r5,32
+        stvx    v23,0,r6
+        addi    r6,r6,32
+        stvx    v24,0,r5
+        addi    r5,r5,32
+        stvx    v25,0,r6
+        addi    r6,r6,32
+        stvx    v26,0,r5
+        addi    r5,r5,32
+        stvx    v27,0,r6
+        addi    r6,r6,32
+        stvx    v28,0,r5
+        addi    r5,r5,32
+        stvx    v29,0,r6
+        addi    r6,r6,32
+        stvx    v30,0,r5
+        stvx    v31,0,r6
+        // Clear the "mask-saved" slot.
+        li      r4,0
+        stw     r4,512(r3)
+        // Restore TOC, LR, and stack and return to caller.
+        ld      r2,24(r1)
+        ld      r0,40(r1)
+        addi    r1,r1,48
+        li      r3,0  // This is the setjmp return path
+        mtlr    r0
+        blr
+        .size _setjmp, .-.L._setjmp
+
+        .globl setjmp
+        .type setjmp, @function
+        .align 4
+setjmp:
+        b       _setjmp
+        .size setjmp, .-setjmp
+
+        // sigsetjmp is like setjmp, except that the mask in r4 needs
+        // to be saved at offset 512 of the jump buffer.
+        .globl __sigsetjmp
+        .type __sigsetjmp, @function
+        .align 4
+#if _CALL_ELF == 2
+__sigsetjmp:
+#else
+	.section ".opd","aw"
+	.align 3
+__sigsetjmp:
+	.quad   .L.__sigsetjmp,.TOC. at tocbase,0
+	.previous
+#endif
+.L.__sigsetjmp:
+        mflr    r0
+        stdu    r1,-64(r1)
+        std     r2,24(r1)
+        std     r3,32(r1)
+        std     r4,40(r1)
+        std     r0,48(r1)
+        // r3 is the original stack pointer.
+        addi    r3,r1,64
+        // r4 is the mangled stack pointer (see glibc)
+        ld      r4,-28696(r13)
+        xor     r4,r3,r4
+        // Materialize a TOC in case we were called from libc.
+        // For big-endian, we load the TOC from the OPD.  For little-
+        // endian, we use the .TOC. symbol to find it.
+        nop
+        bcl     20,31,1f
+1:
+        mflr    r2
+#if _CALL_ELF == 2
+        addis   r2,r2,.TOC.-1b at ha
+        addi    r2,r2,.TOC.-1b at l
+#else
+        addis   r2,r2,_setjmp-1b at ha
+        addi    r2,r2,_setjmp-1b at l
+        ld      r2,8(r2)
+#endif
+        // Call the interceptor.
+        bl      __tsan_setjmp
+        nop
+        // Restore regs needed for __sigsetjmp.
+        ld      r3,32(r1)
+        ld      r4,40(r1)
+        ld      r0,48(r1)
+        // Emulate the real sigsetjmp function.  We do this because we can't
+        // perform a sibcall:  The real sigsetjmp function trashes the TOC
+        // pointer, and with a sibcall we have no way to restore it.
+        // This way we can make sure our caller's stack pointer and
+        // link register are saved correctly in the jmpbuf.
+        ld      r6,-28696(r13)
+        addi    r5,r1,64  // original stack ptr of caller
+        xor     r5,r6,r5
+        std     r5,0(r3)  // mangled stack ptr of caller
+        ld      r5,24(r1)
+        std     r5,8(r3)  // caller's saved TOC pointer
+        xor     r0,r6,r0
+        std     r0,16(r3) // caller's mangled return address
+        mfcr    r0
+        // Nonvolatiles.
+        std     r14,24(r3)
+        stfd    f14,176(r3)
+        stw     r0,172(r3) // CR
+        std     r15,32(r3)
+        stfd    f15,184(r3)
+        std     r16,40(r3)
+        stfd    f16,192(r3)
+        std     r17,48(r3)
+        stfd    f17,200(r3)
+        std     r18,56(r3)
+        stfd    f18,208(r3)
+        std     r19,64(r3)
+        stfd    f19,216(r3)
+        std     r20,72(r3)
+        stfd    f20,224(r3)
+        std     r21,80(r3)
+        stfd    f21,232(r3)
+        std     r22,88(r3)
+        stfd    f22,240(r3)
+        std     r23,96(r3)
+        stfd    f23,248(r3)
+        std     r24,104(r3)
+        stfd    f24,256(r3)
+        std     r25,112(r3)
+        stfd    f25,264(r3)
+        std     r26,120(r3)
+        stfd    f26,272(r3)
+        std     r27,128(r3)
+        stfd    f27,280(r3)
+        std     r28,136(r3)
+        stfd    f28,288(r3)
+        std     r29,144(r3)
+        stfd    f29,296(r3)
+        std     r30,152(r3)
+        stfd    f30,304(r3)
+        std     r31,160(r3)
+        stfd    f31,312(r3)
+        addi    r5,r3,320
+        mfspr   r0,256
+        stw     r0,168(r3) // VRSAVE
+        addi    r6,r5,16
+        stvx    v20,0,r5
+        addi    r5,r5,32
+        stvx    v21,0,r6
+        addi    r6,r6,32
+        stvx    v22,0,r5
+        addi    r5,r5,32
+        stvx    v23,0,r6
+        addi    r6,r6,32
+        stvx    v24,0,r5
+        addi    r5,r5,32
+        stvx    v25,0,r6
+        addi    r6,r6,32
+        stvx    v26,0,r5
+        addi    r5,r5,32
+        stvx    v27,0,r6
+        addi    r6,r6,32
+        stvx    v28,0,r5
+        addi    r5,r5,32
+        stvx    v29,0,r6
+        addi    r6,r6,32
+        stvx    v30,0,r5
+        stvx    v31,0,r6
+        // Save into the "mask-saved" slot.
+        stw     r4,512(r3)
+        // Restore TOC, LR, and stack and return to caller.
+        ld      r2,24(r1)
+        ld      r0,48(r1)
+        addi    r1,r1,64
+        li      r3,0  // This is the sigsetjmp return path
+        mtlr    r0
+        blr
+        .size __sigsetjmp, .-.L.__sigsetjmp
+
+        .globl sigsetjmp
+        .type sigsetjmp, @function
+        .align 4
+sigsetjmp:
+        b       __sigsetjmp
+        .size sigsetjmp, .-sigsetjmp

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl_proc.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_proc.cpp
new file mode 100644
index 0000000000000..def61cca14d57
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_proc.cpp
@@ -0,0 +1,60 @@
+//===-- tsan_rtl_proc.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+#include "tsan_flags.h"
+
+namespace __tsan {
+
+Processor *ProcCreate() {
+  void *mem = InternalAlloc(sizeof(Processor));
+  internal_memset(mem, 0, sizeof(Processor));
+  Processor *proc = new(mem) Processor;
+  proc->thr = nullptr;
+#if !SANITIZER_GO
+  AllocatorProcStart(proc);
+#endif
+  if (common_flags()->detect_deadlocks)
+    proc->dd_pt = ctx->dd->CreatePhysicalThread();
+  return proc;
+}
+
+void ProcDestroy(Processor *proc) {
+  CHECK_EQ(proc->thr, nullptr);
+#if !SANITIZER_GO
+  AllocatorProcFinish(proc);
+#endif
+  ctx->clock_alloc.FlushCache(&proc->clock_cache);
+  ctx->metamap.OnProcIdle(proc);
+  if (common_flags()->detect_deadlocks)
+     ctx->dd->DestroyPhysicalThread(proc->dd_pt);
+  proc->~Processor();
+  InternalFree(proc);
+}
+
+void ProcWire(Processor *proc, ThreadState *thr) {
+  CHECK_EQ(thr->proc1, nullptr);
+  CHECK_EQ(proc->thr, nullptr);
+  thr->proc1 = proc;
+  proc->thr = thr;
+}
+
+void ProcUnwire(Processor *proc, ThreadState *thr) {
+  CHECK_EQ(thr->proc1, proc);
+  CHECK_EQ(proc->thr, thr);
+  thr->proc1 = nullptr;
+  proc->thr = nullptr;
+}
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_report.cpp
new file mode 100644
index 0000000000000..f332a6a8d1d80
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_report.cpp
@@ -0,0 +1,984 @@
+//===-- tsan_rtl_report.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_stackdepot.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "tsan_platform.h"
+#include "tsan_rtl.h"
+#include "tsan_suppressions.h"
+#include "tsan_symbolize.h"
+#include "tsan_report.h"
+#include "tsan_sync.h"
+#include "tsan_mman.h"
+#include "tsan_flags.h"
+#include "tsan_fd.h"
+
+namespace __tsan {
+
+using namespace __sanitizer;
+
+static ReportStack *SymbolizeStack(StackTrace trace);
+
+// Can be overriden by an application/test to intercept reports.
+#ifdef TSAN_EXTERNAL_HOOKS
+bool OnReport(const ReportDesc *rep, bool suppressed);
+#else
+SANITIZER_WEAK_CXX_DEFAULT_IMPL
+bool OnReport(const ReportDesc *rep, bool suppressed) {
+  (void)rep;
+  return suppressed;
+}
+#endif
+
+SANITIZER_WEAK_DEFAULT_IMPL
+void __tsan_on_report(const ReportDesc *rep) {
+  (void)rep;
+}
+
+static void StackStripMain(SymbolizedStack *frames) {
+  SymbolizedStack *last_frame = nullptr;
+  SymbolizedStack *last_frame2 = nullptr;
+  for (SymbolizedStack *cur = frames; cur; cur = cur->next) {
+    last_frame2 = last_frame;
+    last_frame = cur;
+  }
+
+  if (last_frame2 == 0)
+    return;
+#if !SANITIZER_GO
+  const char *last = last_frame->info.function;
+  const char *last2 = last_frame2->info.function;
+  // Strip frame above 'main'
+  if (last2 && 0 == internal_strcmp(last2, "main")) {
+    last_frame->ClearAll();
+    last_frame2->next = nullptr;
+  // Strip our internal thread start routine.
+  } else if (last && 0 == internal_strcmp(last, "__tsan_thread_start_func")) {
+    last_frame->ClearAll();
+    last_frame2->next = nullptr;
+    // Strip global ctors init, .preinit_array and main caller.
+  } else if (last && (0 == internal_strcmp(last, "__do_global_ctors_aux") ||
+                      0 == internal_strcmp(last, "__libc_csu_init") ||
+                      0 == internal_strcmp(last, "__libc_start_main"))) {
+    last_frame->ClearAll();
+    last_frame2->next = nullptr;
+  // If both are 0, then we probably just failed to symbolize.
+  } else if (last || last2) {
+    // Ensure that we recovered stack completely. Trimmed stack
+    // can actually happen if we do not instrument some code,
+    // so it's only a debug print. However we must try hard to not miss it
+    // due to our fault.
+    DPrintf("Bottom stack frame is missed\n");
+  }
+#else
+  // The last frame always point into runtime (gosched0, goexit0, runtime.main).
+  last_frame->ClearAll();
+  last_frame2->next = nullptr;
+#endif
+}
+
+ReportStack *SymbolizeStackId(u32 stack_id) {
+  if (stack_id == 0)
+    return 0;
+  StackTrace stack = StackDepotGet(stack_id);
+  if (stack.trace == nullptr)
+    return nullptr;
+  return SymbolizeStack(stack);
+}
+
+static ReportStack *SymbolizeStack(StackTrace trace) {
+  if (trace.size == 0)
+    return 0;
+  SymbolizedStack *top = nullptr;
+  for (uptr si = 0; si < trace.size; si++) {
+    const uptr pc = trace.trace[si];
+    uptr pc1 = pc;
+    // We obtain the return address, but we're interested in the previous
+    // instruction.
+    if ((pc & kExternalPCBit) == 0)
+      pc1 = StackTrace::GetPreviousInstructionPc(pc);
+    SymbolizedStack *ent = SymbolizeCode(pc1);
+    CHECK_NE(ent, 0);
+    SymbolizedStack *last = ent;
+    while (last->next) {
+      last->info.address = pc;  // restore original pc for report
+      last = last->next;
+    }
+    last->info.address = pc;  // restore original pc for report
+    last->next = top;
+    top = ent;
+  }
+  StackStripMain(top);
+
+  auto *stack = New<ReportStack>();
+  stack->frames = top;
+  return stack;
+}
+
+bool ShouldReport(ThreadState *thr, ReportType typ) {
+  // We set thr->suppress_reports in the fork context.
+  // Taking any locking in the fork context can lead to deadlocks.
+  // If any locks are already taken, it's too late to do this check.
+  CheckedMutex::CheckNoLocks();
+  // For the same reason check we didn't lock thread_registry yet.
+  if (SANITIZER_DEBUG)
+    ThreadRegistryLock l(&ctx->thread_registry);
+  if (!flags()->report_bugs || thr->suppress_reports)
+    return false;
+  switch (typ) {
+    case ReportTypeSignalUnsafe:
+      return flags()->report_signal_unsafe;
+    case ReportTypeThreadLeak:
+#if !SANITIZER_GO
+      // It's impossible to join phantom threads
+      // in the child after fork.
+      if (ctx->after_multithreaded_fork)
+        return false;
+#endif
+      return flags()->report_thread_leaks;
+    case ReportTypeMutexDestroyLocked:
+      return flags()->report_destroy_locked;
+    default:
+      return true;
+  }
+}
+
+ScopedReportBase::ScopedReportBase(ReportType typ, uptr tag) {
+  ctx->thread_registry.CheckLocked();
+  rep_ = New<ReportDesc>();
+  rep_->typ = typ;
+  rep_->tag = tag;
+  ctx->report_mtx.Lock();
+}
+
+ScopedReportBase::~ScopedReportBase() {
+  ctx->report_mtx.Unlock();
+  DestroyAndFree(rep_);
+}
+
+void ScopedReportBase::AddStack(StackTrace stack, bool suppressable) {
+  ReportStack **rs = rep_->stacks.PushBack();
+  *rs = SymbolizeStack(stack);
+  (*rs)->suppressable = suppressable;
+}
+
+void ScopedReportBase::AddMemoryAccess(uptr addr, uptr external_tag, Shadow s,
+                                       StackTrace stack, const MutexSet *mset) {
+  auto *mop = New<ReportMop>();
+  rep_->mops.PushBack(mop);
+  mop->tid = s.tid();
+  mop->addr = addr + s.addr0();
+  mop->size = s.size();
+  mop->write = s.IsWrite();
+  mop->atomic = s.IsAtomic();
+  mop->stack = SymbolizeStack(stack);
+  mop->external_tag = external_tag;
+  if (mop->stack)
+    mop->stack->suppressable = true;
+  for (uptr i = 0; i < mset->Size(); i++) {
+    MutexSet::Desc d = mset->Get(i);
+    u64 mid = this->AddMutex(d.id);
+    ReportMopMutex mtx = {mid, d.write};
+    mop->mset.PushBack(mtx);
+  }
+}
+
+void ScopedReportBase::AddUniqueTid(Tid unique_tid) {
+  rep_->unique_tids.PushBack(unique_tid);
+}
+
+void ScopedReportBase::AddThread(const ThreadContext *tctx, bool suppressable) {
+  for (uptr i = 0; i < rep_->threads.Size(); i++) {
+    if ((u32)rep_->threads[i]->id == tctx->tid)
+      return;
+  }
+  auto *rt = New<ReportThread>();
+  rep_->threads.PushBack(rt);
+  rt->id = tctx->tid;
+  rt->os_id = tctx->os_id;
+  rt->running = (tctx->status == ThreadStatusRunning);
+  rt->name = internal_strdup(tctx->name);
+  rt->parent_tid = tctx->parent_tid;
+  rt->thread_type = tctx->thread_type;
+  rt->stack = 0;
+  rt->stack = SymbolizeStackId(tctx->creation_stack_id);
+  if (rt->stack)
+    rt->stack->suppressable = suppressable;
+}
+
+#if !SANITIZER_GO
+static bool FindThreadByUidLockedCallback(ThreadContextBase *tctx, void *arg) {
+  int unique_id = *(int *)arg;
+  return tctx->unique_id == (u32)unique_id;
+}
+
+static ThreadContext *FindThreadByUidLocked(Tid unique_id) {
+  ctx->thread_registry.CheckLocked();
+  return static_cast<ThreadContext *>(
+      ctx->thread_registry.FindThreadContextLocked(
+          FindThreadByUidLockedCallback, &unique_id));
+}
+
+static ThreadContext *FindThreadByTidLocked(Tid tid) {
+  ctx->thread_registry.CheckLocked();
+  return static_cast<ThreadContext *>(
+      ctx->thread_registry.GetThreadLocked(tid));
+}
+
+static bool IsInStackOrTls(ThreadContextBase *tctx_base, void *arg) {
+  uptr addr = (uptr)arg;
+  ThreadContext *tctx = static_cast<ThreadContext*>(tctx_base);
+  if (tctx->status != ThreadStatusRunning)
+    return false;
+  ThreadState *thr = tctx->thr;
+  CHECK(thr);
+  return ((addr >= thr->stk_addr && addr < thr->stk_addr + thr->stk_size) ||
+          (addr >= thr->tls_addr && addr < thr->tls_addr + thr->tls_size));
+}
+
+ThreadContext *IsThreadStackOrTls(uptr addr, bool *is_stack) {
+  ctx->thread_registry.CheckLocked();
+  ThreadContext *tctx =
+      static_cast<ThreadContext *>(ctx->thread_registry.FindThreadContextLocked(
+          IsInStackOrTls, (void *)addr));
+  if (!tctx)
+    return 0;
+  ThreadState *thr = tctx->thr;
+  CHECK(thr);
+  *is_stack = (addr >= thr->stk_addr && addr < thr->stk_addr + thr->stk_size);
+  return tctx;
+}
+#endif
+
+void ScopedReportBase::AddThread(Tid unique_tid, bool suppressable) {
+#if !SANITIZER_GO
+  if (const ThreadContext *tctx = FindThreadByUidLocked(unique_tid))
+    AddThread(tctx, suppressable);
+#endif
+}
+
+void ScopedReportBase::AddMutex(const SyncVar *s) {
+  for (uptr i = 0; i < rep_->mutexes.Size(); i++) {
+    if (rep_->mutexes[i]->id == s->uid)
+      return;
+  }
+  auto *rm = New<ReportMutex>();
+  rep_->mutexes.PushBack(rm);
+  rm->id = s->uid;
+  rm->addr = s->addr;
+  rm->destroyed = false;
+  rm->stack = SymbolizeStackId(s->creation_stack_id);
+}
+
+u64 ScopedReportBase::AddMutex(u64 id) {
+  u64 uid = 0;
+  u64 mid = id;
+  uptr addr = SyncVar::SplitId(id, &uid);
+  SyncVar *s = ctx->metamap.GetSyncIfExists(addr);
+  // Check that the mutex is still alive.
+  // Another mutex can be created at the same address,
+  // so check uid as well.
+  if (s && s->CheckId(uid)) {
+    Lock l(&s->mtx);
+    mid = s->uid;
+    AddMutex(s);
+  } else {
+    AddDeadMutex(id);
+  }
+  return mid;
+}
+
+void ScopedReportBase::AddDeadMutex(u64 id) {
+  for (uptr i = 0; i < rep_->mutexes.Size(); i++) {
+    if (rep_->mutexes[i]->id == id)
+      return;
+  }
+  auto *rm = New<ReportMutex>();
+  rep_->mutexes.PushBack(rm);
+  rm->id = id;
+  rm->addr = 0;
+  rm->destroyed = true;
+  rm->stack = 0;
+}
+
+void ScopedReportBase::AddLocation(uptr addr, uptr size) {
+  if (addr == 0)
+    return;
+#if !SANITIZER_GO
+  int fd = -1;
+  Tid creat_tid = kInvalidTid;
+  StackID creat_stack = 0;
+  if (FdLocation(addr, &fd, &creat_tid, &creat_stack)) {
+    auto *loc = New<ReportLocation>();
+    loc->type = ReportLocationFD;
+    loc->fd = fd;
+    loc->tid = creat_tid;
+    loc->stack = SymbolizeStackId(creat_stack);
+    rep_->locs.PushBack(loc);
+    ThreadContext *tctx = FindThreadByUidLocked(creat_tid);
+    if (tctx)
+      AddThread(tctx);
+    return;
+  }
+  MBlock *b = 0;
+  uptr block_begin = 0;
+  Allocator *a = allocator();
+  if (a->PointerIsMine((void*)addr)) {
+    block_begin = (uptr)a->GetBlockBegin((void *)addr);
+    if (block_begin)
+      b = ctx->metamap.GetBlock(block_begin);
+  }
+  if (!b)
+    b = JavaHeapBlock(addr, &block_begin);
+  if (b != 0) {
+    ThreadContext *tctx = FindThreadByTidLocked(b->tid);
+    auto *loc = New<ReportLocation>();
+    loc->type = ReportLocationHeap;
+    loc->heap_chunk_start = block_begin;
+    loc->heap_chunk_size = b->siz;
+    loc->external_tag = b->tag;
+    loc->tid = tctx ? tctx->tid : b->tid;
+    loc->stack = SymbolizeStackId(b->stk);
+    rep_->locs.PushBack(loc);
+    if (tctx)
+      AddThread(tctx);
+    return;
+  }
+  bool is_stack = false;
+  if (ThreadContext *tctx = IsThreadStackOrTls(addr, &is_stack)) {
+    auto *loc = New<ReportLocation>();
+    loc->type = is_stack ? ReportLocationStack : ReportLocationTLS;
+    loc->tid = tctx->tid;
+    rep_->locs.PushBack(loc);
+    AddThread(tctx);
+  }
+#endif
+  if (ReportLocation *loc = SymbolizeData(addr)) {
+    loc->suppressable = true;
+    rep_->locs.PushBack(loc);
+    return;
+  }
+}
+
+#if !SANITIZER_GO
+void ScopedReportBase::AddSleep(StackID stack_id) {
+  rep_->sleep = SymbolizeStackId(stack_id);
+}
+#endif
+
+void ScopedReportBase::SetCount(int count) { rep_->count = count; }
+
+const ReportDesc *ScopedReportBase::GetReport() const { return rep_; }
+
+ScopedReport::ScopedReport(ReportType typ, uptr tag)
+    : ScopedReportBase(typ, tag) {}
+
+ScopedReport::~ScopedReport() {}
+
+void RestoreStack(Tid tid, const u64 epoch, VarSizeStackTrace *stk,
+                  MutexSet *mset, uptr *tag) {
+  // This function restores stack trace and mutex set for the thread/epoch.
+  // It does so by getting stack trace and mutex set at the beginning of
+  // trace part, and then replaying the trace till the given epoch.
+  Trace* trace = ThreadTrace(tid);
+  ReadLock l(&trace->mtx);
+  const int partidx = (epoch / kTracePartSize) % TraceParts();
+  TraceHeader* hdr = &trace->headers[partidx];
+  if (epoch < hdr->epoch0 || epoch >= hdr->epoch0 + kTracePartSize)
+    return;
+  CHECK_EQ(RoundDown(epoch, kTracePartSize), hdr->epoch0);
+  const u64 epoch0 = RoundDown(epoch, TraceSize());
+  const u64 eend = epoch % TraceSize();
+  const u64 ebegin = RoundDown(eend, kTracePartSize);
+  DPrintf("#%d: RestoreStack epoch=%zu ebegin=%zu eend=%zu partidx=%d\n",
+          tid, (uptr)epoch, (uptr)ebegin, (uptr)eend, partidx);
+  Vector<uptr> stack;
+  stack.Resize(hdr->stack0.size + 64);
+  for (uptr i = 0; i < hdr->stack0.size; i++) {
+    stack[i] = hdr->stack0.trace[i];
+    DPrintf2("  #%02zu: pc=%zx\n", i, stack[i]);
+  }
+  if (mset)
+    *mset = hdr->mset0;
+  uptr pos = hdr->stack0.size;
+  Event *events = (Event*)GetThreadTrace(tid);
+  for (uptr i = ebegin; i <= eend; i++) {
+    Event ev = events[i];
+    EventType typ = (EventType)(ev >> kEventPCBits);
+    uptr pc = (uptr)(ev & ((1ull << kEventPCBits) - 1));
+    DPrintf2("  %zu typ=%d pc=%zx\n", i, typ, pc);
+    if (typ == EventTypeMop) {
+      stack[pos] = pc;
+    } else if (typ == EventTypeFuncEnter) {
+      if (stack.Size() < pos + 2)
+        stack.Resize(pos + 2);
+      stack[pos++] = pc;
+    } else if (typ == EventTypeFuncExit) {
+      if (pos > 0)
+        pos--;
+    }
+    if (mset) {
+      if (typ == EventTypeLock) {
+        mset->Add(pc, true, epoch0 + i);
+      } else if (typ == EventTypeUnlock) {
+        mset->Del(pc, true);
+      } else if (typ == EventTypeRLock) {
+        mset->Add(pc, false, epoch0 + i);
+      } else if (typ == EventTypeRUnlock) {
+        mset->Del(pc, false);
+      }
+    }
+    for (uptr j = 0; j <= pos; j++)
+      DPrintf2("      #%zu: %zx\n", j, stack[j]);
+  }
+  if (pos == 0 && stack[0] == 0)
+    return;
+  pos++;
+  stk->Init(&stack[0], pos);
+  ExtractTagFromStack(stk, tag);
+}
+
+namespace v3 {
+
+// Replays the trace up to last_pos position in the last part
+// or up to the provided epoch/sid (whichever is earlier)
+// and calls the provided function f for each event.
+template <typename Func>
+void TraceReplay(Trace *trace, TracePart *last, Event *last_pos, Sid sid,
+                 Epoch epoch, Func f) {
+  TracePart *part = trace->parts.Front();
+  Sid ev_sid = kFreeSid;
+  Epoch ev_epoch = kEpochOver;
+  for (;;) {
+    DCHECK_EQ(part->trace, trace);
+    // Note: an event can't start in the last element.
+    // Since an event can take up to 2 elements,
+    // we ensure we have at least 2 before adding an event.
+    Event *end = &part->events[TracePart::kSize - 1];
+    if (part == last)
+      end = last_pos;
+    for (Event *evp = &part->events[0]; evp < end; evp++) {
+      Event *evp0 = evp;
+      if (!evp->is_access && !evp->is_func) {
+        switch (evp->type) {
+          case EventType::kTime: {
+            auto *ev = reinterpret_cast<EventTime *>(evp);
+            ev_sid = static_cast<Sid>(ev->sid);
+            ev_epoch = static_cast<Epoch>(ev->epoch);
+            if (ev_sid == sid && ev_epoch > epoch)
+              return;
+            break;
+          }
+          case EventType::kAccessExt:
+            FALLTHROUGH;
+          case EventType::kAccessRange:
+            FALLTHROUGH;
+          case EventType::kLock:
+            FALLTHROUGH;
+          case EventType::kRLock:
+            // These take 2 Event elements.
+            evp++;
+            break;
+          case EventType::kUnlock:
+            // This takes 1 Event element.
+            break;
+        }
+      }
+      CHECK_NE(ev_sid, kFreeSid);
+      CHECK_NE(ev_epoch, kEpochOver);
+      f(ev_sid, ev_epoch, evp0);
+    }
+    if (part == last)
+      return;
+    part = trace->parts.Next(part);
+    CHECK(part);
+  }
+  CHECK(0);
+}
+
+static void RestoreStackMatch(VarSizeStackTrace *pstk, MutexSet *pmset,
+                              Vector<uptr> *stack, MutexSet *mset, uptr pc,
+                              bool *found) {
+  DPrintf2("    MATCHED\n");
+  *pmset = *mset;
+  stack->PushBack(pc);
+  pstk->Init(&(*stack)[0], stack->Size());
+  stack->PopBack();
+  *found = true;
+}
+
+// Checks if addr1|size1 is fully contained in addr2|size2.
+// We check for fully contained instread of just overlapping
+// because a memory access is always traced once, but can be
+// split into multiple accesses in the shadow.
+static constexpr bool IsWithinAccess(uptr addr1, uptr size1, uptr addr2,
+                                     uptr size2) {
+  return addr1 >= addr2 && addr1 + size1 <= addr2 + size2;
+}
+
+// Replays the trace of thread tid up to the target event identified
+// by sid/epoch/addr/size/typ and restores and returns stack, mutex set
+// and tag for that event. If there are multiple such events, it returns
+// the last one. Returns false if the event is not present in the trace.
+bool RestoreStack(Tid tid, EventType type, Sid sid, Epoch epoch, uptr addr,
+                  uptr size, AccessType typ, VarSizeStackTrace *pstk,
+                  MutexSet *pmset, uptr *ptag) {
+  // This function restores stack trace and mutex set for the thread/epoch.
+  // It does so by getting stack trace and mutex set at the beginning of
+  // trace part, and then replaying the trace till the given epoch.
+  DPrintf2("RestoreStack: tid=%u sid=%u@%u addr=0x%zx/%zu typ=%x\n", tid,
+           static_cast<int>(sid), static_cast<int>(epoch), addr, size,
+           static_cast<int>(typ));
+  ctx->slot_mtx.CheckLocked();  // needed to prevent trace part recycling
+  ctx->thread_registry.CheckLocked();
+  ThreadContext *tctx =
+      static_cast<ThreadContext *>(ctx->thread_registry.GetThreadLocked(tid));
+  Trace *trace = &tctx->trace;
+  // Snapshot first/last parts and the current position in the last part.
+  TracePart *first_part;
+  TracePart *last_part;
+  Event *last_pos;
+  {
+    Lock lock(&trace->mtx);
+    first_part = trace->parts.Front();
+    if (!first_part)
+      return false;
+    last_part = trace->parts.Back();
+    last_pos = trace->final_pos;
+    if (tctx->thr)
+      last_pos = (Event *)atomic_load_relaxed(&tctx->thr->trace_pos);
+  }
+  DynamicMutexSet mset;
+  Vector<uptr> stack;
+  uptr prev_pc = 0;
+  bool found = false;
+  bool is_read = typ & kAccessRead;
+  bool is_atomic = typ & kAccessAtomic;
+  bool is_free = typ & kAccessFree;
+  TraceReplay(
+      trace, last_part, last_pos, sid, epoch,
+      [&](Sid ev_sid, Epoch ev_epoch, Event *evp) {
+        bool match = ev_sid == sid && ev_epoch == epoch;
+        if (evp->is_access) {
+          if (evp->is_func == 0 && evp->type == EventType::kAccessExt &&
+              evp->_ == 0)  // NopEvent
+            return;
+          auto *ev = reinterpret_cast<EventAccess *>(evp);
+          uptr ev_addr = RestoreAddr(ev->addr);
+          uptr ev_size = 1 << ev->size_log;
+          uptr ev_pc =
+              prev_pc + ev->pc_delta - (1 << (EventAccess::kPCBits - 1));
+          prev_pc = ev_pc;
+          DPrintf2("  Access: pc=0x%zx addr=0x%zx/%zu type=%u/%u\n", ev_pc,
+                   ev_addr, ev_size, ev->is_read, ev->is_atomic);
+          if (match && type == EventType::kAccessExt &&
+              IsWithinAccess(addr, size, ev_addr, ev_size) &&
+              is_read == ev->is_read && is_atomic == ev->is_atomic && !is_free)
+            RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found);
+          return;
+        }
+        if (evp->is_func) {
+          auto *ev = reinterpret_cast<EventFunc *>(evp);
+          if (ev->pc) {
+            DPrintf2("  FuncEnter: pc=0x%llx\n", ev->pc);
+            stack.PushBack(ev->pc);
+          } else {
+            DPrintf2("  FuncExit\n");
+            CHECK(stack.Size());
+            stack.PopBack();
+          }
+          return;
+        }
+        switch (evp->type) {
+          case EventType::kAccessExt: {
+            auto *ev = reinterpret_cast<EventAccessExt *>(evp);
+            uptr ev_addr = RestoreAddr(ev->addr);
+            uptr ev_size = 1 << ev->size_log;
+            prev_pc = ev->pc;
+            DPrintf2("  AccessExt: pc=0x%llx addr=0x%zx/%zu type=%u/%u\n",
+                     ev->pc, ev_addr, ev_size, ev->is_read, ev->is_atomic);
+            if (match && type == EventType::kAccessExt &&
+                IsWithinAccess(addr, size, ev_addr, ev_size) &&
+                is_read == ev->is_read && is_atomic == ev->is_atomic &&
+                !is_free)
+              RestoreStackMatch(pstk, pmset, &stack, mset, ev->pc, &found);
+            break;
+          }
+          case EventType::kAccessRange: {
+            auto *ev = reinterpret_cast<EventAccessRange *>(evp);
+            uptr ev_addr = RestoreAddr(ev->addr);
+            uptr ev_size =
+                (ev->size_hi << EventAccessRange::kSizeLoBits) + ev->size_lo;
+            uptr ev_pc = RestoreAddr(ev->pc);
+            prev_pc = ev_pc;
+            DPrintf2("  Range: pc=0x%zx addr=0x%zx/%zu type=%u/%u\n", ev_pc,
+                     ev_addr, ev_size, ev->is_read, ev->is_free);
+            if (match && type == EventType::kAccessExt &&
+                IsWithinAccess(addr, size, ev_addr, ev_size) &&
+                is_read == ev->is_read && !is_atomic && is_free == ev->is_free)
+              RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found);
+            break;
+          }
+          case EventType::kLock:
+            FALLTHROUGH;
+          case EventType::kRLock: {
+            auto *ev = reinterpret_cast<EventLock *>(evp);
+            bool is_write = ev->type == EventType::kLock;
+            uptr ev_addr = RestoreAddr(ev->addr);
+            uptr ev_pc = RestoreAddr(ev->pc);
+            StackID stack_id =
+                (ev->stack_hi << EventLock::kStackIDLoBits) + ev->stack_lo;
+            DPrintf2("  Lock: pc=0x%zx addr=0x%zx stack=%u write=%d\n", ev_pc,
+                     ev_addr, stack_id, is_write);
+            mset->AddAddr(ev_addr, stack_id, is_write);
+            // Events with ev_pc == 0 are written to the beginning of trace
+            // part as initial mutex set (are not real).
+            if (match && type == EventType::kLock && addr == ev_addr && ev_pc)
+              RestoreStackMatch(pstk, pmset, &stack, mset, ev_pc, &found);
+            break;
+          }
+          case EventType::kUnlock: {
+            auto *ev = reinterpret_cast<EventUnlock *>(evp);
+            uptr ev_addr = RestoreAddr(ev->addr);
+            DPrintf2("  Unlock: addr=0x%zx\n", ev_addr);
+            mset->DelAddr(ev_addr);
+            break;
+          }
+          case EventType::kTime:
+            // TraceReplay already extracted sid/epoch from it,
+            // nothing else to do here.
+            break;
+        }
+      });
+  ExtractTagFromStack(pstk, ptag);
+  return found;
+}
+
+}  // namespace v3
+
+bool RacyStacks::operator==(const RacyStacks &other) const {
+  if (hash[0] == other.hash[0] && hash[1] == other.hash[1])
+    return true;
+  if (hash[0] == other.hash[1] && hash[1] == other.hash[0])
+    return true;
+  return false;
+}
+
+static bool FindRacyStacks(const RacyStacks &hash) {
+  for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
+    if (hash == ctx->racy_stacks[i]) {
+      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (stack)\n");
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2]) {
+  if (!flags()->suppress_equal_stacks)
+    return false;
+  RacyStacks hash;
+  hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
+  hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (FindRacyStacks(hash))
+      return true;
+  }
+  Lock lock(&ctx->racy_mtx);
+  if (FindRacyStacks(hash))
+    return true;
+  ctx->racy_stacks.PushBack(hash);
+  return false;
+}
+
+static bool FindRacyAddress(const RacyAddress &ra0) {
+  for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
+    RacyAddress ra2 = ctx->racy_addresses[i];
+    uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
+    uptr minend = min(ra0.addr_max, ra2.addr_max);
+    if (maxbeg < minend) {
+      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool HandleRacyAddress(ThreadState *thr, uptr addr_min, uptr addr_max) {
+  if (!flags()->suppress_equal_addresses)
+    return false;
+  RacyAddress ra0 = {addr_min, addr_max};
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (FindRacyAddress(ra0))
+      return true;
+  }
+  Lock lock(&ctx->racy_mtx);
+  if (FindRacyAddress(ra0))
+    return true;
+  ctx->racy_addresses.PushBack(ra0);
+  return false;
+}
+
+bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
+  // These should have been checked in ShouldReport.
+  // It's too late to check them here, we have already taken locks.
+  CHECK(flags()->report_bugs);
+  CHECK(!thr->suppress_reports);
+  atomic_store_relaxed(&ctx->last_symbolize_time_ns, NanoTime());
+  const ReportDesc *rep = srep.GetReport();
+  CHECK_EQ(thr->current_report, nullptr);
+  thr->current_report = rep;
+  Suppression *supp = 0;
+  uptr pc_or_addr = 0;
+  for (uptr i = 0; pc_or_addr == 0 && i < rep->mops.Size(); i++)
+    pc_or_addr = IsSuppressed(rep->typ, rep->mops[i]->stack, &supp);
+  for (uptr i = 0; pc_or_addr == 0 && i < rep->stacks.Size(); i++)
+    pc_or_addr = IsSuppressed(rep->typ, rep->stacks[i], &supp);
+  for (uptr i = 0; pc_or_addr == 0 && i < rep->threads.Size(); i++)
+    pc_or_addr = IsSuppressed(rep->typ, rep->threads[i]->stack, &supp);
+  for (uptr i = 0; pc_or_addr == 0 && i < rep->locs.Size(); i++)
+    pc_or_addr = IsSuppressed(rep->typ, rep->locs[i], &supp);
+  if (pc_or_addr != 0) {
+    Lock lock(&ctx->fired_suppressions_mtx);
+    FiredSuppression s = {srep.GetReport()->typ, pc_or_addr, supp};
+    ctx->fired_suppressions.push_back(s);
+  }
+  {
+    bool old_is_freeing = thr->is_freeing;
+    thr->is_freeing = false;
+    bool suppressed = OnReport(rep, pc_or_addr != 0);
+    thr->is_freeing = old_is_freeing;
+    if (suppressed) {
+      thr->current_report = nullptr;
+      return false;
+    }
+  }
+  PrintReport(rep);
+  __tsan_on_report(rep);
+  ctx->nreported++;
+  if (flags()->halt_on_error)
+    Die();
+  thr->current_report = nullptr;
+  return true;
+}
+
+bool IsFiredSuppression(Context *ctx, ReportType type, StackTrace trace) {
+  ReadLock lock(&ctx->fired_suppressions_mtx);
+  for (uptr k = 0; k < ctx->fired_suppressions.size(); k++) {
+    if (ctx->fired_suppressions[k].type != type)
+      continue;
+    for (uptr j = 0; j < trace.size; j++) {
+      FiredSuppression *s = &ctx->fired_suppressions[k];
+      if (trace.trace[j] == s->pc_or_addr) {
+        if (s->supp)
+          atomic_fetch_add(&s->supp->hit_count, 1, memory_order_relaxed);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+static bool IsFiredSuppression(Context *ctx, ReportType type, uptr addr) {
+  ReadLock lock(&ctx->fired_suppressions_mtx);
+  for (uptr k = 0; k < ctx->fired_suppressions.size(); k++) {
+    if (ctx->fired_suppressions[k].type != type)
+      continue;
+    FiredSuppression *s = &ctx->fired_suppressions[k];
+    if (addr == s->pc_or_addr) {
+      if (s->supp)
+        atomic_fetch_add(&s->supp->hit_count, 1, memory_order_relaxed);
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool RaceBetweenAtomicAndFree(ThreadState *thr) {
+  Shadow s0(thr->racy_state[0]);
+  Shadow s1(thr->racy_state[1]);
+  CHECK(!(s0.IsAtomic() && s1.IsAtomic()));
+  if (!s0.IsAtomic() && !s1.IsAtomic())
+    return true;
+  if (s0.IsAtomic() && s1.IsFreed())
+    return true;
+  if (s1.IsAtomic() && thr->is_freeing)
+    return true;
+  return false;
+}
+
+void ReportRace(ThreadState *thr) {
+  CheckedMutex::CheckNoLocks();
+
+  // Symbolizer makes lots of intercepted calls. If we try to process them,
+  // at best it will cause deadlocks on internal mutexes.
+  ScopedIgnoreInterceptors ignore;
+
+  if (!ShouldReport(thr, ReportTypeRace))
+    return;
+  if (!flags()->report_atomic_races && !RaceBetweenAtomicAndFree(thr))
+    return;
+
+  bool freed = false;
+  {
+    Shadow s(thr->racy_state[1]);
+    freed = s.GetFreedAndReset();
+    thr->racy_state[1] = s.raw();
+  }
+
+  uptr addr = ShadowToMem(thr->racy_shadow_addr);
+  uptr addr_min = 0;
+  uptr addr_max = 0;
+  {
+    uptr a0 = addr + Shadow(thr->racy_state[0]).addr0();
+    uptr a1 = addr + Shadow(thr->racy_state[1]).addr0();
+    uptr e0 = a0 + Shadow(thr->racy_state[0]).size();
+    uptr e1 = a1 + Shadow(thr->racy_state[1]).size();
+    addr_min = min(a0, a1);
+    addr_max = max(e0, e1);
+    if (IsExpectedReport(addr_min, addr_max - addr_min))
+      return;
+  }
+  if (HandleRacyAddress(thr, addr_min, addr_max))
+    return;
+
+  ReportType typ = ReportTypeRace;
+  if (thr->is_vptr_access && freed)
+    typ = ReportTypeVptrUseAfterFree;
+  else if (thr->is_vptr_access)
+    typ = ReportTypeVptrRace;
+  else if (freed)
+    typ = ReportTypeUseAfterFree;
+
+  if (IsFiredSuppression(ctx, typ, addr))
+    return;
+
+  const uptr kMop = 2;
+  VarSizeStackTrace traces[kMop];
+  uptr tags[kMop] = {kExternalTagNone};
+  uptr toppc = TraceTopPC(thr);
+  if (toppc >> kEventPCBits) {
+    // This is a work-around for a known issue.
+    // The scenario where this happens is rather elaborate and requires
+    // an instrumented __sanitizer_report_error_summary callback and
+    // a __tsan_symbolize_external callback and a race during a range memory
+    // access larger than 8 bytes. MemoryAccessRange adds the current PC to
+    // the trace and starts processing memory accesses. A first memory access
+    // triggers a race, we report it and call the instrumented
+    // __sanitizer_report_error_summary, which adds more stuff to the trace
+    // since it is intrumented. Then a second memory access in MemoryAccessRange
+    // also triggers a race and we get here and call TraceTopPC to get the
+    // current PC, however now it contains some unrelated events from the
+    // callback. Most likely, TraceTopPC will now return a EventTypeFuncExit
+    // event. Later we subtract -1 from it (in GetPreviousInstructionPc)
+    // and the resulting PC has kExternalPCBit set, so we pass it to
+    // __tsan_symbolize_external_ex. __tsan_symbolize_external_ex is within its
+    // rights to crash since the PC is completely bogus.
+    // test/tsan/double_race.cpp contains a test case for this.
+    toppc = 0;
+  }
+  ObtainCurrentStack(thr, toppc, &traces[0], &tags[0]);
+  if (IsFiredSuppression(ctx, typ, traces[0]))
+    return;
+
+  DynamicMutexSet mset2;
+  Shadow s2(thr->racy_state[1]);
+  RestoreStack(s2.tid(), s2.epoch(), &traces[1], mset2, &tags[1]);
+  if (IsFiredSuppression(ctx, typ, traces[1]))
+    return;
+
+  if (HandleRacyStacks(thr, traces))
+    return;
+
+  // If any of the accesses has a tag, treat this as an "external" race.
+  uptr tag = kExternalTagNone;
+  for (uptr i = 0; i < kMop; i++) {
+    if (tags[i] != kExternalTagNone) {
+      typ = ReportTypeExternalRace;
+      tag = tags[i];
+      break;
+    }
+  }
+
+  ThreadRegistryLock l0(&ctx->thread_registry);
+  ScopedReport rep(typ, tag);
+  for (uptr i = 0; i < kMop; i++) {
+    Shadow s(thr->racy_state[i]);
+    rep.AddMemoryAccess(addr, tags[i], s, traces[i],
+                        i == 0 ? &thr->mset : mset2);
+  }
+
+  for (uptr i = 0; i < kMop; i++) {
+    FastState s(thr->racy_state[i]);
+    ThreadContext *tctx = static_cast<ThreadContext *>(
+        ctx->thread_registry.GetThreadLocked(s.tid()));
+    if (s.epoch() < tctx->epoch0 || s.epoch() > tctx->epoch1)
+      continue;
+    rep.AddThread(tctx);
+  }
+
+  rep.AddLocation(addr_min, addr_max - addr_min);
+
+#if !SANITIZER_GO
+  {
+    Shadow s(thr->racy_state[1]);
+    if (s.epoch() <= thr->last_sleep_clock.get(s.tid()))
+      rep.AddSleep(thr->last_sleep_stack_id);
+  }
+#endif
+
+  OutputReport(thr, rep);
+}
+
+void PrintCurrentStack(ThreadState *thr, uptr pc) {
+  VarSizeStackTrace trace;
+  ObtainCurrentStack(thr, pc, &trace);
+  PrintStack(SymbolizeStack(trace));
+}
+
+// Always inlining PrintCurrentStackSlow, because LocatePcInTrace assumes
+// __sanitizer_print_stack_trace exists in the actual unwinded stack, but
+// tail-call to PrintCurrentStackSlow breaks this assumption because
+// __sanitizer_print_stack_trace disappears after tail-call.
+// However, this solution is not reliable enough, please see dvyukov's comment
+// http://reviews.llvm.org/D19148#406208
+// Also see PR27280 comment 2 and 3 for breaking examples and analysis.
+ALWAYS_INLINE USED void PrintCurrentStackSlow(uptr pc) {
+#if !SANITIZER_GO
+  uptr bp = GET_CURRENT_FRAME();
+  auto *ptrace = New<BufferedStackTrace>();
+  ptrace->Unwind(pc, bp, nullptr, false);
+
+  for (uptr i = 0; i < ptrace->size / 2; i++) {
+    uptr tmp = ptrace->trace_buffer[i];
+    ptrace->trace_buffer[i] = ptrace->trace_buffer[ptrace->size - i - 1];
+    ptrace->trace_buffer[ptrace->size - i - 1] = tmp;
+  }
+  PrintStack(SymbolizeStack(*ptrace));
+#endif
+}
+
+}  // namespace __tsan
+
+using namespace __tsan;
+
+extern "C" {
+SANITIZER_INTERFACE_ATTRIBUTE
+void __sanitizer_print_stack_trace() {
+  PrintCurrentStackSlow(StackTrace::GetCurrentPc());
+}
+}  // extern "C"

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl_s390x.S b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_s390x.S
new file mode 100644
index 0000000000000..fcff35fbc7e07
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_s390x.S
@@ -0,0 +1,47 @@
+#include "sanitizer_common/sanitizer_asm.h"
+
+#define CFA_OFFSET 160
+#define R2_REL_OFFSET 16
+#define R3_REL_OFFSET 24
+#define R14_REL_OFFSET 112
+#define R15_REL_OFFSET 120
+#define FRAME_SIZE 160
+
+.text
+
+ASM_HIDDEN(__tsan_setjmp)
+
+.macro intercept symbol, real
+.comm \real, 8, 8
+.globl ASM_SYMBOL_INTERCEPTOR(\symbol)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(\symbol))
+ASM_SYMBOL_INTERCEPTOR(\symbol):
+  CFI_STARTPROC
+  stmg %r2, %r3, R2_REL_OFFSET(%r15)
+  CFI_REL_OFFSET(%r2, R2_REL_OFFSET)
+  CFI_REL_OFFSET(%r3, R3_REL_OFFSET)
+  stmg %r14, %r15, R14_REL_OFFSET(%r15)
+  CFI_REL_OFFSET(%r14, R14_REL_OFFSET)
+  CFI_REL_OFFSET(%r15, R15_REL_OFFSET)
+  aghi %r15, -FRAME_SIZE
+  CFI_ADJUST_CFA_OFFSET(FRAME_SIZE)
+  la %r2, FRAME_SIZE(%r15)
+  brasl %r14, ASM_SYMBOL(__tsan_setjmp)
+  lmg %r14, %r15, FRAME_SIZE + R14_REL_OFFSET(%r15)
+  CFI_RESTORE(%r14)
+  CFI_RESTORE(%r15)
+  CFI_DEF_CFA_OFFSET(CFA_OFFSET)
+  lmg %r2, %r3, R2_REL_OFFSET(%r15)
+  CFI_RESTORE(%r2)
+  CFI_RESTORE(%r3)
+  larl %r1, \real
+  lg %r1, 0(%r1)
+  br %r1
+  CFI_ENDPROC
+  ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(\symbol))
+.endm
+
+intercept setjmp, _ZN14__interception11real_setjmpE
+intercept _setjmp, _ZN14__interception12real__setjmpE
+intercept sigsetjmp, _ZN14__interception14real_sigsetjmpE
+intercept __sigsetjmp, _ZN14__interception16real___sigsetjmpE

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_rtl_thread.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_thread.cpp
new file mode 100644
index 0000000000000..c8f7124c009d6
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_rtl_thread.cpp
@@ -0,0 +1,349 @@
+//===-- tsan_rtl_thread.cpp -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+#include "tsan_platform.h"
+#include "tsan_report.h"
+#include "tsan_sync.h"
+
+namespace __tsan {
+
+// ThreadContext implementation.
+
+ThreadContext::ThreadContext(Tid tid)
+    : ThreadContextBase(tid), thr(), sync(), epoch0(), epoch1() {}
+
+#if !SANITIZER_GO
+ThreadContext::~ThreadContext() {
+}
+#endif
+
+void ThreadContext::OnReset() {
+  CHECK_EQ(sync.size(), 0);
+  uptr trace_p = GetThreadTrace(tid);
+  ReleaseMemoryPagesToOS(trace_p, trace_p + TraceSize() * sizeof(Event));
+  //!!! ReleaseMemoryToOS(GetThreadTraceHeader(tid), sizeof(Trace));
+}
+
+#if !SANITIZER_GO
+struct ThreadLeak {
+  ThreadContext *tctx;
+  int count;
+};
+
+static void CollectThreadLeaks(ThreadContextBase *tctx_base, void *arg) {
+  auto &leaks = *static_cast<Vector<ThreadLeak> *>(arg);
+  auto *tctx = static_cast<ThreadContext *>(tctx_base);
+  if (tctx->detached || tctx->status != ThreadStatusFinished)
+    return;
+  for (uptr i = 0; i < leaks.Size(); i++) {
+    if (leaks[i].tctx->creation_stack_id == tctx->creation_stack_id) {
+      leaks[i].count++;
+      return;
+    }
+  }
+  leaks.PushBack({tctx, 1});
+}
+#endif
+
+#if !SANITIZER_GO
+static void ReportIgnoresEnabled(ThreadContext *tctx, IgnoreSet *set) {
+  if (tctx->tid == kMainTid) {
+    Printf("ThreadSanitizer: main thread finished with ignores enabled\n");
+  } else {
+    Printf("ThreadSanitizer: thread T%d %s finished with ignores enabled,"
+      " created at:\n", tctx->tid, tctx->name);
+    PrintStack(SymbolizeStackId(tctx->creation_stack_id));
+  }
+  Printf("  One of the following ignores was not ended"
+      " (in order of probability)\n");
+  for (uptr i = 0; i < set->Size(); i++) {
+    Printf("  Ignore was enabled at:\n");
+    PrintStack(SymbolizeStackId(set->At(i)));
+  }
+  Die();
+}
+
+static void ThreadCheckIgnore(ThreadState *thr) {
+  if (ctx->after_multithreaded_fork)
+    return;
+  if (thr->ignore_reads_and_writes)
+    ReportIgnoresEnabled(thr->tctx, &thr->mop_ignore_set);
+  if (thr->ignore_sync)
+    ReportIgnoresEnabled(thr->tctx, &thr->sync_ignore_set);
+}
+#else
+static void ThreadCheckIgnore(ThreadState *thr) {}
+#endif
+
+void ThreadFinalize(ThreadState *thr) {
+  ThreadCheckIgnore(thr);
+#if !SANITIZER_GO
+  if (!ShouldReport(thr, ReportTypeThreadLeak))
+    return;
+  ThreadRegistryLock l(&ctx->thread_registry);
+  Vector<ThreadLeak> leaks;
+  ctx->thread_registry.RunCallbackForEachThreadLocked(CollectThreadLeaks,
+                                                      &leaks);
+  for (uptr i = 0; i < leaks.Size(); i++) {
+    ScopedReport rep(ReportTypeThreadLeak);
+    rep.AddThread(leaks[i].tctx, true);
+    rep.SetCount(leaks[i].count);
+    OutputReport(thr, rep);
+  }
+#endif
+}
+
+int ThreadCount(ThreadState *thr) {
+  uptr result;
+  ctx->thread_registry.GetNumberOfThreads(0, 0, &result);
+  return (int)result;
+}
+
+struct OnCreatedArgs {
+  ThreadState *thr;
+  uptr pc;
+};
+
+Tid ThreadCreate(ThreadState *thr, uptr pc, uptr uid, bool detached) {
+  OnCreatedArgs args = { thr, pc };
+  u32 parent_tid = thr ? thr->tid : kInvalidTid;  // No parent for GCD workers.
+  Tid tid = ctx->thread_registry.CreateThread(uid, detached, parent_tid, &args);
+  DPrintf("#%d: ThreadCreate tid=%d uid=%zu\n", parent_tid, tid, uid);
+  return tid;
+}
+
+void ThreadContext::OnCreated(void *arg) {
+  thr = 0;
+  if (tid == kMainTid)
+    return;
+  OnCreatedArgs *args = static_cast<OnCreatedArgs *>(arg);
+  if (!args->thr)  // GCD workers don't have a parent thread.
+    return;
+  args->thr->fast_state.IncrementEpoch();
+  // Can't increment epoch w/o writing to the trace as well.
+  TraceAddEvent(args->thr, args->thr->fast_state, EventTypeMop, 0);
+  ReleaseImpl(args->thr, 0, &sync);
+  creation_stack_id = CurrentStackId(args->thr, args->pc);
+}
+
+extern "C" void __tsan_stack_initialization() {}
+
+struct OnStartedArgs {
+  ThreadState *thr;
+  uptr stk_addr;
+  uptr stk_size;
+  uptr tls_addr;
+  uptr tls_size;
+};
+
+void ThreadStart(ThreadState *thr, Tid tid, tid_t os_id,
+                 ThreadType thread_type) {
+  uptr stk_addr = 0;
+  uptr stk_size = 0;
+  uptr tls_addr = 0;
+  uptr tls_size = 0;
+#if !SANITIZER_GO
+  if (thread_type != ThreadType::Fiber)
+    GetThreadStackAndTls(tid == kMainTid, &stk_addr, &stk_size, &tls_addr,
+                         &tls_size);
+#endif
+
+  ThreadRegistry *tr = &ctx->thread_registry;
+  OnStartedArgs args = { thr, stk_addr, stk_size, tls_addr, tls_size };
+  tr->StartThread(tid, os_id, thread_type, &args);
+
+  while (!thr->tctx->trace.parts.Empty()) thr->tctx->trace.parts.PopBack();
+
+#if !SANITIZER_GO
+  if (ctx->after_multithreaded_fork) {
+    thr->ignore_interceptors++;
+    ThreadIgnoreBegin(thr, 0);
+    ThreadIgnoreSyncBegin(thr, 0);
+  }
+#endif
+
+#if !SANITIZER_GO
+  // Don't imitate stack/TLS writes for the main thread,
+  // because its initialization is synchronized with all
+  // subsequent threads anyway.
+  if (tid != kMainTid) {
+    if (stk_addr && stk_size) {
+      const uptr pc = StackTrace::GetNextInstructionPc(
+          reinterpret_cast<uptr>(__tsan_stack_initialization));
+      MemoryRangeImitateWrite(thr, pc, stk_addr, stk_size);
+    }
+
+    if (tls_addr && tls_size)
+      ImitateTlsWrite(thr, tls_addr, tls_size);
+  }
+#endif
+}
+
+void ThreadContext::OnStarted(void *arg) {
+  OnStartedArgs *args = static_cast<OnStartedArgs *>(arg);
+  thr = args->thr;
+  // RoundUp so that one trace part does not contain events
+  // from 
diff erent threads.
+  epoch0 = RoundUp(epoch1 + 1, kTracePartSize);
+  epoch1 = (u64)-1;
+  new (thr)
+      ThreadState(ctx, tid, unique_id, epoch0, reuse_count, args->stk_addr,
+                  args->stk_size, args->tls_addr, args->tls_size);
+  if (common_flags()->detect_deadlocks)
+    thr->dd_lt = ctx->dd->CreateLogicalThread(unique_id);
+  thr->fast_state.SetHistorySize(flags()->history_size);
+  // Commit switch to the new part of the trace.
+  // TraceAddEvent will reset stack0/mset0 in the new part for us.
+  TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+
+  thr->fast_synch_epoch = epoch0;
+  AcquireImpl(thr, 0, &sync);
+  sync.Reset(&thr->proc()->clock_cache);
+  thr->tctx = this;
+  thr->is_inited = true;
+  DPrintf(
+      "#%d: ThreadStart epoch=%zu stk_addr=%zx stk_size=%zx "
+      "tls_addr=%zx tls_size=%zx\n",
+      tid, (uptr)epoch0, args->stk_addr, args->stk_size, args->tls_addr,
+      args->tls_size);
+}
+
+void ThreadFinish(ThreadState *thr) {
+  ThreadCheckIgnore(thr);
+  if (thr->stk_addr && thr->stk_size)
+    DontNeedShadowFor(thr->stk_addr, thr->stk_size);
+  if (thr->tls_addr && thr->tls_size)
+    DontNeedShadowFor(thr->tls_addr, thr->tls_size);
+  thr->is_dead = true;
+  thr->is_inited = false;
+#if !SANITIZER_GO
+  thr->ignore_interceptors++;
+#endif
+  ctx->thread_registry.FinishThread(thr->tid);
+}
+
+void ThreadContext::OnFinished() {
+  if (!detached) {
+    thr->fast_state.IncrementEpoch();
+    // Can't increment epoch w/o writing to the trace as well.
+    TraceAddEvent(thr, thr->fast_state, EventTypeMop, 0);
+    ReleaseImpl(thr, 0, &sync);
+  }
+  epoch1 = thr->fast_state.epoch();
+
+#if !SANITIZER_GO
+  UnmapOrDie(thr->shadow_stack, kShadowStackSize * sizeof(uptr));
+#else
+  Free(thr->shadow_stack);
+#endif
+  thr->shadow_stack = nullptr;
+  thr->shadow_stack_pos = nullptr;
+  thr->shadow_stack_end = nullptr;
+
+  if (common_flags()->detect_deadlocks)
+    ctx->dd->DestroyLogicalThread(thr->dd_lt);
+  thr->clock.ResetCached(&thr->proc()->clock_cache);
+#if !SANITIZER_GO
+  thr->last_sleep_clock.ResetCached(&thr->proc()->clock_cache);
+#endif
+#if !SANITIZER_GO
+  PlatformCleanUpThreadState(thr);
+#endif
+  thr->~ThreadState();
+  thr = 0;
+}
+
+struct ConsumeThreadContext {
+  uptr uid;
+  ThreadContextBase *tctx;
+};
+
+Tid ThreadConsumeTid(ThreadState *thr, uptr pc, uptr uid) {
+  return ctx->thread_registry.ConsumeThreadUserId(uid);
+}
+
+void ThreadJoin(ThreadState *thr, uptr pc, Tid tid) {
+  CHECK_GT(tid, 0);
+  CHECK_LT(tid, kMaxTid);
+  DPrintf("#%d: ThreadJoin tid=%d\n", thr->tid, tid);
+  ctx->thread_registry.JoinThread(tid, thr);
+}
+
+void ThreadContext::OnJoined(void *arg) {
+  ThreadState *caller_thr = static_cast<ThreadState *>(arg);
+  AcquireImpl(caller_thr, 0, &sync);
+  sync.Reset(&caller_thr->proc()->clock_cache);
+}
+
+void ThreadContext::OnDead() { CHECK_EQ(sync.size(), 0); }
+
+void ThreadDetach(ThreadState *thr, uptr pc, Tid tid) {
+  CHECK_GT(tid, 0);
+  CHECK_LT(tid, kMaxTid);
+  ctx->thread_registry.DetachThread(tid, thr);
+}
+
+void ThreadContext::OnDetached(void *arg) {
+  ThreadState *thr1 = static_cast<ThreadState *>(arg);
+  sync.Reset(&thr1->proc()->clock_cache);
+}
+
+void ThreadNotJoined(ThreadState *thr, uptr pc, Tid tid, uptr uid) {
+  CHECK_GT(tid, 0);
+  CHECK_LT(tid, kMaxTid);
+  ctx->thread_registry.SetThreadUserId(tid, uid);
+}
+
+void ThreadSetName(ThreadState *thr, const char *name) {
+  ctx->thread_registry.SetThreadName(thr->tid, name);
+}
+
+#if !SANITIZER_GO
+void FiberSwitchImpl(ThreadState *from, ThreadState *to) {
+  Processor *proc = from->proc();
+  ProcUnwire(proc, from);
+  ProcWire(proc, to);
+  set_cur_thread(to);
+}
+
+ThreadState *FiberCreate(ThreadState *thr, uptr pc, unsigned flags) {
+  void *mem = Alloc(sizeof(ThreadState));
+  ThreadState *fiber = static_cast<ThreadState *>(mem);
+  internal_memset(fiber, 0, sizeof(*fiber));
+  Tid tid = ThreadCreate(thr, pc, 0, true);
+  FiberSwitchImpl(thr, fiber);
+  ThreadStart(fiber, tid, 0, ThreadType::Fiber);
+  FiberSwitchImpl(fiber, thr);
+  return fiber;
+}
+
+void FiberDestroy(ThreadState *thr, uptr pc, ThreadState *fiber) {
+  FiberSwitchImpl(thr, fiber);
+  ThreadFinish(fiber);
+  FiberSwitchImpl(fiber, thr);
+  Free(fiber);
+}
+
+void FiberSwitch(ThreadState *thr, uptr pc,
+                 ThreadState *fiber, unsigned flags) {
+  if (!(flags & FiberSwitchFlagNoSync))
+    Release(thr, pc, (uptr)fiber);
+  FiberSwitchImpl(thr, fiber);
+  if (!(flags & FiberSwitchFlagNoSync))
+    Acquire(fiber, pc, (uptr)fiber);
+}
+#endif
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_shadow.h b/compiler-rt/lib/tsan/rtl-old/tsan_shadow.h
new file mode 100644
index 0000000000000..8b7bc341713e8
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_shadow.h
@@ -0,0 +1,233 @@
+//===-- tsan_shadow.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TSAN_SHADOW_H
+#define TSAN_SHADOW_H
+
+#include "tsan_defs.h"
+#include "tsan_trace.h"
+
+namespace __tsan {
+
+// FastState (from most significant bit):
+//   ignore          : 1
+//   tid             : kTidBits
+//   unused          : -
+//   history_size    : 3
+//   epoch           : kClkBits
+class FastState {
+ public:
+  FastState(u64 tid, u64 epoch) {
+    x_ = tid << kTidShift;
+    x_ |= epoch;
+    DCHECK_EQ(tid, this->tid());
+    DCHECK_EQ(epoch, this->epoch());
+    DCHECK_EQ(GetIgnoreBit(), false);
+  }
+
+  explicit FastState(u64 x) : x_(x) {}
+
+  u64 raw() const { return x_; }
+
+  u64 tid() const {
+    u64 res = (x_ & ~kIgnoreBit) >> kTidShift;
+    return res;
+  }
+
+  u64 TidWithIgnore() const {
+    u64 res = x_ >> kTidShift;
+    return res;
+  }
+
+  u64 epoch() const {
+    u64 res = x_ & ((1ull << kClkBits) - 1);
+    return res;
+  }
+
+  void IncrementEpoch() {
+    u64 old_epoch = epoch();
+    x_ += 1;
+    DCHECK_EQ(old_epoch + 1, epoch());
+    (void)old_epoch;
+  }
+
+  void SetIgnoreBit() { x_ |= kIgnoreBit; }
+  void ClearIgnoreBit() { x_ &= ~kIgnoreBit; }
+  bool GetIgnoreBit() const { return (s64)x_ < 0; }
+
+  void SetHistorySize(int hs) {
+    CHECK_GE(hs, 0);
+    CHECK_LE(hs, 7);
+    x_ = (x_ & ~(kHistoryMask << kHistoryShift)) | (u64(hs) << kHistoryShift);
+  }
+
+  ALWAYS_INLINE
+  int GetHistorySize() const {
+    return (int)((x_ >> kHistoryShift) & kHistoryMask);
+  }
+
+  void ClearHistorySize() { SetHistorySize(0); }
+
+  ALWAYS_INLINE
+  u64 GetTracePos() const {
+    const int hs = GetHistorySize();
+    // When hs == 0, the trace consists of 2 parts.
+    const u64 mask = (1ull << (kTracePartSizeBits + hs + 1)) - 1;
+    return epoch() & mask;
+  }
+
+ private:
+  friend class Shadow;
+  static const int kTidShift = 64 - kTidBits - 1;
+  static const u64 kIgnoreBit = 1ull << 63;
+  static const u64 kFreedBit = 1ull << 63;
+  static const u64 kHistoryShift = kClkBits;
+  static const u64 kHistoryMask = 7;
+  u64 x_;
+};
+
+// Shadow (from most significant bit):
+//   freed           : 1
+//   tid             : kTidBits
+//   is_atomic       : 1
+//   is_read         : 1
+//   size_log        : 2
+//   addr0           : 3
+//   epoch           : kClkBits
+class Shadow : public FastState {
+ public:
+  explicit Shadow(u64 x) : FastState(x) {}
+
+  explicit Shadow(const FastState &s) : FastState(s.x_) { ClearHistorySize(); }
+
+  void SetAddr0AndSizeLog(u64 addr0, unsigned kAccessSizeLog) {
+    DCHECK_EQ((x_ >> kClkBits) & 31, 0);
+    DCHECK_LE(addr0, 7);
+    DCHECK_LE(kAccessSizeLog, 3);
+    x_ |= ((kAccessSizeLog << 3) | addr0) << kClkBits;
+    DCHECK_EQ(kAccessSizeLog, size_log());
+    DCHECK_EQ(addr0, this->addr0());
+  }
+
+  void SetWrite(unsigned kAccessIsWrite) {
+    DCHECK_EQ(x_ & kReadBit, 0);
+    if (!kAccessIsWrite)
+      x_ |= kReadBit;
+    DCHECK_EQ(kAccessIsWrite, IsWrite());
+  }
+
+  void SetAtomic(bool kIsAtomic) {
+    DCHECK(!IsAtomic());
+    if (kIsAtomic)
+      x_ |= kAtomicBit;
+    DCHECK_EQ(IsAtomic(), kIsAtomic);
+  }
+
+  bool IsAtomic() const { return x_ & kAtomicBit; }
+
+  bool IsZero() const { return x_ == 0; }
+
+  static inline bool TidsAreEqual(const Shadow s1, const Shadow s2) {
+    u64 shifted_xor = (s1.x_ ^ s2.x_) >> kTidShift;
+    DCHECK_EQ(shifted_xor == 0, s1.TidWithIgnore() == s2.TidWithIgnore());
+    return shifted_xor == 0;
+  }
+
+  static ALWAYS_INLINE bool Addr0AndSizeAreEqual(const Shadow s1,
+                                                 const Shadow s2) {
+    u64 masked_xor = ((s1.x_ ^ s2.x_) >> kClkBits) & 31;
+    return masked_xor == 0;
+  }
+
+  static ALWAYS_INLINE bool TwoRangesIntersect(Shadow s1, Shadow s2,
+                                               unsigned kS2AccessSize) {
+    bool res = false;
+    u64 
diff  = s1.addr0() - s2.addr0();
+    if ((s64)
diff  < 0) {  // s1.addr0 < s2.addr0
+      // if (s1.addr0() + size1) > s2.addr0()) return true;
+      if (s1.size() > -
diff )
+        res = true;
+    } else {
+      // if (s2.addr0() + kS2AccessSize > s1.addr0()) return true;
+      if (kS2AccessSize > 
diff )
+        res = true;
+    }
+    DCHECK_EQ(res, TwoRangesIntersectSlow(s1, s2));
+    DCHECK_EQ(res, TwoRangesIntersectSlow(s2, s1));
+    return res;
+  }
+
+  u64 ALWAYS_INLINE addr0() const { return (x_ >> kClkBits) & 7; }
+  u64 ALWAYS_INLINE size() const { return 1ull << size_log(); }
+  bool ALWAYS_INLINE IsWrite() const { return !IsRead(); }
+  bool ALWAYS_INLINE IsRead() const { return x_ & kReadBit; }
+
+  // The idea behind the freed bit is as follows.
+  // When the memory is freed (or otherwise unaccessible) we write to the shadow
+  // values with tid/epoch related to the free and the freed bit set.
+  // During memory accesses processing the freed bit is considered
+  // as msb of tid. So any access races with shadow with freed bit set
+  // (it is as if write from a thread with which we never synchronized before).
+  // This allows us to detect accesses to freed memory w/o additional
+  // overheads in memory access processing and at the same time restore
+  // tid/epoch of free.
+  void MarkAsFreed() { x_ |= kFreedBit; }
+
+  bool IsFreed() const { return x_ & kFreedBit; }
+
+  bool GetFreedAndReset() {
+    bool res = x_ & kFreedBit;
+    x_ &= ~kFreedBit;
+    return res;
+  }
+
+  bool ALWAYS_INLINE IsBothReadsOrAtomic(bool kIsWrite, bool kIsAtomic) const {
+    bool v = x_ & ((u64(kIsWrite ^ 1) << kReadShift) |
+                   (u64(kIsAtomic) << kAtomicShift));
+    DCHECK_EQ(v, (!IsWrite() && !kIsWrite) || (IsAtomic() && kIsAtomic));
+    return v;
+  }
+
+  bool ALWAYS_INLINE IsRWNotWeaker(bool kIsWrite, bool kIsAtomic) const {
+    bool v = ((x_ >> kReadShift) & 3) <= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
+    DCHECK_EQ(v, (IsAtomic() < kIsAtomic) ||
+                     (IsAtomic() == kIsAtomic && !IsWrite() <= !kIsWrite));
+    return v;
+  }
+
+  bool ALWAYS_INLINE IsRWWeakerOrEqual(bool kIsWrite, bool kIsAtomic) const {
+    bool v = ((x_ >> kReadShift) & 3) >= u64((kIsWrite ^ 1) | (kIsAtomic << 1));
+    DCHECK_EQ(v, (IsAtomic() > kIsAtomic) ||
+                     (IsAtomic() == kIsAtomic && !IsWrite() >= !kIsWrite));
+    return v;
+  }
+
+ private:
+  static const u64 kReadShift = 5 + kClkBits;
+  static const u64 kReadBit = 1ull << kReadShift;
+  static const u64 kAtomicShift = 6 + kClkBits;
+  static const u64 kAtomicBit = 1ull << kAtomicShift;
+
+  u64 size_log() const { return (x_ >> (3 + kClkBits)) & 3; }
+
+  static bool TwoRangesIntersectSlow(const Shadow s1, const Shadow s2) {
+    if (s1.addr0() == s2.addr0())
+      return true;
+    if (s1.addr0() < s2.addr0() && s1.addr0() + s1.size() > s2.addr0())
+      return true;
+    if (s2.addr0() < s1.addr0() && s2.addr0() + s2.size() > s1.addr0())
+      return true;
+    return false;
+  }
+};
+
+const RawShadow kShadowRodata = (RawShadow)-1;  // .rodata shadow marker
+
+}  // namespace __tsan
+
+#endif

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_stack_trace.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_stack_trace.cpp
new file mode 100644
index 0000000000000..9bbaafb3a85f5
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_stack_trace.cpp
@@ -0,0 +1,57 @@
+//===-- tsan_stack_trace.cpp ----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_stack_trace.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+
+namespace __tsan {
+
+VarSizeStackTrace::VarSizeStackTrace()
+    : StackTrace(nullptr, 0), trace_buffer(nullptr) {}
+
+VarSizeStackTrace::~VarSizeStackTrace() {
+  ResizeBuffer(0);
+}
+
+void VarSizeStackTrace::ResizeBuffer(uptr new_size) {
+  Free(trace_buffer);
+  trace_buffer = (new_size > 0)
+                     ? (uptr *)Alloc(new_size * sizeof(trace_buffer[0]))
+                     : nullptr;
+  trace = trace_buffer;
+  size = new_size;
+}
+
+void VarSizeStackTrace::Init(const uptr *pcs, uptr cnt, uptr extra_top_pc) {
+  ResizeBuffer(cnt + !!extra_top_pc);
+  internal_memcpy(trace_buffer, pcs, cnt * sizeof(trace_buffer[0]));
+  if (extra_top_pc)
+    trace_buffer[cnt] = extra_top_pc;
+}
+
+void VarSizeStackTrace::ReverseOrder() {
+  for (u32 i = 0; i < (size >> 1); i++)
+    Swap(trace_buffer[i], trace_buffer[size - 1 - i]);
+}
+
+}  // namespace __tsan
+
+#if !SANITIZER_GO
+void __sanitizer::BufferedStackTrace::UnwindImpl(
+    uptr pc, uptr bp, void *context, bool request_fast, u32 max_depth) {
+  uptr top = 0;
+  uptr bottom = 0;
+  GetThreadStackTopAndBottom(false, &top, &bottom);
+  bool fast = StackTrace::WillUseFastUnwind(request_fast);
+  Unwind(max_depth, pc, bp, context, top, bottom, fast);
+}
+#endif  // SANITIZER_GO

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_stack_trace.h b/compiler-rt/lib/tsan/rtl-old/tsan_stack_trace.h
new file mode 100644
index 0000000000000..3eb8ce156e835
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_stack_trace.h
@@ -0,0 +1,42 @@
+//===-- tsan_stack_trace.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_STACK_TRACE_H
+#define TSAN_STACK_TRACE_H
+
+#include "sanitizer_common/sanitizer_stacktrace.h"
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+// StackTrace which calls malloc/free to allocate the buffer for
+// addresses in stack traces.
+struct VarSizeStackTrace : public StackTrace {
+  uptr *trace_buffer;  // Owned.
+
+  VarSizeStackTrace();
+  ~VarSizeStackTrace();
+  void Init(const uptr *pcs, uptr cnt, uptr extra_top_pc = 0);
+
+  // Reverses the current stack trace order, the top frame goes to the bottom,
+  // the last frame goes to the top.
+  void ReverseOrder();
+
+ private:
+  void ResizeBuffer(uptr new_size);
+
+  VarSizeStackTrace(const VarSizeStackTrace &);
+  void operator=(const VarSizeStackTrace &);
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_STACK_TRACE_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_suppressions.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_suppressions.cpp
new file mode 100644
index 0000000000000..a1c1bf81bf670
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_suppressions.cpp
@@ -0,0 +1,161 @@
+//===-- tsan_suppressions.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_suppressions.h"
+#include "tsan_suppressions.h"
+#include "tsan_rtl.h"
+#include "tsan_flags.h"
+#include "tsan_mman.h"
+#include "tsan_platform.h"
+
+#if !SANITIZER_GO
+// Suppressions for true/false positives in standard libraries.
+static const char *const std_suppressions =
+// Libstdc++ 4.4 has data races in std::string.
+// See http://crbug.com/181502 for an example.
+"race:^_M_rep$\n"
+"race:^_M_is_leaked$\n"
+// False positive when using std <thread>.
+// Happens because we miss atomic synchronization in libstdc++.
+// See http://llvm.org/bugs/show_bug.cgi?id=17066 for details.
+"race:std::_Sp_counted_ptr_inplace<std::thread::_Impl\n";
+
+// Can be overriden in frontend.
+SANITIZER_WEAK_DEFAULT_IMPL
+const char *__tsan_default_suppressions() {
+  return 0;
+}
+#endif
+
+namespace __tsan {
+
+ALIGNED(64) static char suppression_placeholder[sizeof(SuppressionContext)];
+static SuppressionContext *suppression_ctx = nullptr;
+static const char *kSuppressionTypes[] = {
+    kSuppressionRace,   kSuppressionRaceTop, kSuppressionMutex,
+    kSuppressionThread, kSuppressionSignal, kSuppressionLib,
+    kSuppressionDeadlock};
+
+void InitializeSuppressions() {
+  CHECK_EQ(nullptr, suppression_ctx);
+  suppression_ctx = new (suppression_placeholder)
+      SuppressionContext(kSuppressionTypes, ARRAY_SIZE(kSuppressionTypes));
+  suppression_ctx->ParseFromFile(flags()->suppressions);
+#if !SANITIZER_GO
+  suppression_ctx->Parse(__tsan_default_suppressions());
+  suppression_ctx->Parse(std_suppressions);
+#endif
+}
+
+SuppressionContext *Suppressions() {
+  CHECK(suppression_ctx);
+  return suppression_ctx;
+}
+
+static const char *conv(ReportType typ) {
+  switch (typ) {
+    case ReportTypeRace:
+    case ReportTypeVptrRace:
+    case ReportTypeUseAfterFree:
+    case ReportTypeVptrUseAfterFree:
+    case ReportTypeExternalRace:
+      return kSuppressionRace;
+    case ReportTypeThreadLeak:
+      return kSuppressionThread;
+    case ReportTypeMutexDestroyLocked:
+    case ReportTypeMutexDoubleLock:
+    case ReportTypeMutexInvalidAccess:
+    case ReportTypeMutexBadUnlock:
+    case ReportTypeMutexBadReadLock:
+    case ReportTypeMutexBadReadUnlock:
+      return kSuppressionMutex;
+    case ReportTypeSignalUnsafe:
+    case ReportTypeErrnoInSignal:
+      return kSuppressionSignal;
+    case ReportTypeDeadlock:
+      return kSuppressionDeadlock;
+    // No default case so compiler warns us if we miss one
+  }
+  UNREACHABLE("missing case");
+}
+
+static uptr IsSuppressed(const char *stype, const AddressInfo &info,
+    Suppression **sp) {
+  if (suppression_ctx->Match(info.function, stype, sp) ||
+      suppression_ctx->Match(info.file, stype, sp) ||
+      suppression_ctx->Match(info.module, stype, sp)) {
+    VPrintf(2, "ThreadSanitizer: matched suppression '%s'\n", (*sp)->templ);
+    atomic_fetch_add(&(*sp)->hit_count, 1, memory_order_relaxed);
+    return info.address;
+  }
+  return 0;
+}
+
+uptr IsSuppressed(ReportType typ, const ReportStack *stack, Suppression **sp) {
+  CHECK(suppression_ctx);
+  if (!suppression_ctx->SuppressionCount() || stack == 0 ||
+      !stack->suppressable)
+    return 0;
+  const char *stype = conv(typ);
+  if (0 == internal_strcmp(stype, kSuppressionNone))
+    return 0;
+  for (const SymbolizedStack *frame = stack->frames; frame;
+      frame = frame->next) {
+    uptr pc = IsSuppressed(stype, frame->info, sp);
+    if (pc != 0)
+      return pc;
+  }
+  if (0 == internal_strcmp(stype, kSuppressionRace) && stack->frames != nullptr)
+    return IsSuppressed(kSuppressionRaceTop, stack->frames->info, sp);
+  return 0;
+}
+
+uptr IsSuppressed(ReportType typ, const ReportLocation *loc, Suppression **sp) {
+  CHECK(suppression_ctx);
+  if (!suppression_ctx->SuppressionCount() || loc == 0 ||
+      loc->type != ReportLocationGlobal || !loc->suppressable)
+    return 0;
+  const char *stype = conv(typ);
+  if (0 == internal_strcmp(stype, kSuppressionNone))
+    return 0;
+  Suppression *s;
+  const DataInfo &global = loc->global;
+  if (suppression_ctx->Match(global.name, stype, &s) ||
+      suppression_ctx->Match(global.module, stype, &s)) {
+      VPrintf(2, "ThreadSanitizer: matched suppression '%s'\n", s->templ);
+      atomic_fetch_add(&s->hit_count, 1, memory_order_relaxed);
+      *sp = s;
+      return global.start;
+  }
+  return 0;
+}
+
+void PrintMatchedSuppressions() {
+  InternalMmapVector<Suppression *> matched;
+  CHECK(suppression_ctx);
+  suppression_ctx->GetMatched(&matched);
+  if (!matched.size())
+    return;
+  int hit_count = 0;
+  for (uptr i = 0; i < matched.size(); i++)
+    hit_count += atomic_load_relaxed(&matched[i]->hit_count);
+  Printf("ThreadSanitizer: Matched %d suppressions (pid=%d):\n", hit_count,
+         (int)internal_getpid());
+  for (uptr i = 0; i < matched.size(); i++) {
+    Printf("%d %s:%s\n", atomic_load_relaxed(&matched[i]->hit_count),
+           matched[i]->type, matched[i]->templ);
+  }
+}
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_suppressions.h b/compiler-rt/lib/tsan/rtl-old/tsan_suppressions.h
new file mode 100644
index 0000000000000..f430aeb6c4cf1
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_suppressions.h
@@ -0,0 +1,37 @@
+//===-- tsan_suppressions.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_SUPPRESSIONS_H
+#define TSAN_SUPPRESSIONS_H
+
+#include "sanitizer_common/sanitizer_suppressions.h"
+#include "tsan_report.h"
+
+namespace __tsan {
+
+const char kSuppressionNone[] = "none";
+const char kSuppressionRace[] = "race";
+const char kSuppressionRaceTop[] = "race_top";
+const char kSuppressionMutex[] = "mutex";
+const char kSuppressionThread[] = "thread";
+const char kSuppressionSignal[] = "signal";
+const char kSuppressionLib[] = "called_from_lib";
+const char kSuppressionDeadlock[] = "deadlock";
+
+void InitializeSuppressions();
+SuppressionContext *Suppressions();
+void PrintMatchedSuppressions();
+uptr IsSuppressed(ReportType typ, const ReportStack *stack, Suppression **sp);
+uptr IsSuppressed(ReportType typ, const ReportLocation *loc, Suppression **sp);
+
+}  // namespace __tsan
+
+#endif  // TSAN_SUPPRESSIONS_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_symbolize.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_symbolize.cpp
new file mode 100644
index 0000000000000..2e2744d2eae78
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_symbolize.cpp
@@ -0,0 +1,123 @@
+//===-- tsan_symbolize.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+
+#include "tsan_symbolize.h"
+
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "sanitizer_common/sanitizer_symbolizer.h"
+#include "tsan_flags.h"
+#include "tsan_report.h"
+#include "tsan_rtl.h"
+
+namespace __tsan {
+
+void EnterSymbolizer() {
+  ThreadState *thr = cur_thread();
+  CHECK(!thr->in_symbolizer);
+  thr->in_symbolizer = true;
+  thr->ignore_interceptors++;
+}
+
+void ExitSymbolizer() {
+  ThreadState *thr = cur_thread();
+  CHECK(thr->in_symbolizer);
+  thr->in_symbolizer = false;
+  thr->ignore_interceptors--;
+}
+
+// Legacy API.
+// May be overriden by JIT/JAVA/etc,
+// whatever produces PCs marked with kExternalPCBit.
+SANITIZER_WEAK_DEFAULT_IMPL
+bool __tsan_symbolize_external(uptr pc, char *func_buf, uptr func_siz,
+                               char *file_buf, uptr file_siz, int *line,
+                               int *col) {
+  return false;
+}
+
+// New API: call __tsan_symbolize_external_ex only when it exists.
+// Once old clients are gone, provide dummy implementation.
+SANITIZER_WEAK_DEFAULT_IMPL
+void __tsan_symbolize_external_ex(uptr pc,
+                                  void (*add_frame)(void *, const char *,
+                                                    const char *, int, int),
+                                  void *ctx) {}
+
+struct SymbolizedStackBuilder {
+  SymbolizedStack *head;
+  SymbolizedStack *tail;
+  uptr addr;
+};
+
+static void AddFrame(void *ctx, const char *function_name, const char *file,
+                     int line, int column) {
+  SymbolizedStackBuilder *ssb = (struct SymbolizedStackBuilder *)ctx;
+  if (ssb->tail) {
+    ssb->tail->next = SymbolizedStack::New(ssb->addr);
+    ssb->tail = ssb->tail->next;
+  } else {
+    ssb->head = ssb->tail = SymbolizedStack::New(ssb->addr);
+  }
+  AddressInfo *info = &ssb->tail->info;
+  if (function_name) {
+    info->function = internal_strdup(function_name);
+  }
+  if (file) {
+    info->file = internal_strdup(file);
+  }
+  info->line = line;
+  info->column = column;
+}
+
+SymbolizedStack *SymbolizeCode(uptr addr) {
+  // Check if PC comes from non-native land.
+  if (addr & kExternalPCBit) {
+    SymbolizedStackBuilder ssb = {nullptr, nullptr, addr};
+    __tsan_symbolize_external_ex(addr, AddFrame, &ssb);
+    if (ssb.head)
+      return ssb.head;
+    // Legacy code: remove along with the declaration above
+    // once all clients using this API are gone.
+    // Declare static to not consume too much stack space.
+    // We symbolize reports in a single thread, so this is fine.
+    static char func_buf[1024];
+    static char file_buf[1024];
+    int line, col;
+    SymbolizedStack *frame = SymbolizedStack::New(addr);
+    if (__tsan_symbolize_external(addr, func_buf, sizeof(func_buf), file_buf,
+                                  sizeof(file_buf), &line, &col)) {
+      frame->info.function = internal_strdup(func_buf);
+      frame->info.file = internal_strdup(file_buf);
+      frame->info.line = line;
+      frame->info.column = col;
+    }
+    return frame;
+  }
+  return Symbolizer::GetOrInit()->SymbolizePC(addr);
+}
+
+ReportLocation *SymbolizeData(uptr addr) {
+  DataInfo info;
+  if (!Symbolizer::GetOrInit()->SymbolizeData(addr, &info))
+    return 0;
+  auto *ent = New<ReportLocation>();
+  ent->type = ReportLocationGlobal;
+  internal_memcpy(&ent->global, &info, sizeof(info));
+  return ent;
+}
+
+void SymbolizeFlush() {
+  Symbolizer::GetOrInit()->Flush();
+}
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_symbolize.h b/compiler-rt/lib/tsan/rtl-old/tsan_symbolize.h
new file mode 100644
index 0000000000000..7adaa04dc273e
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_symbolize.h
@@ -0,0 +1,30 @@
+//===-- tsan_symbolize.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_SYMBOLIZE_H
+#define TSAN_SYMBOLIZE_H
+
+#include "tsan_defs.h"
+#include "tsan_report.h"
+
+namespace __tsan {
+
+void EnterSymbolizer();
+void ExitSymbolizer();
+SymbolizedStack *SymbolizeCode(uptr addr);
+ReportLocation *SymbolizeData(uptr addr);
+void SymbolizeFlush();
+
+ReportStack *NewReportStackEntry(uptr addr);
+
+}  // namespace __tsan
+
+#endif  // TSAN_SYMBOLIZE_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_sync.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_sync.cpp
new file mode 100644
index 0000000000000..f042abab74e5e
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_sync.cpp
@@ -0,0 +1,279 @@
+//===-- tsan_sync.cpp -----------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "tsan_sync.h"
+#include "tsan_rtl.h"
+#include "tsan_mman.h"
+
+namespace __tsan {
+
+void DDMutexInit(ThreadState *thr, uptr pc, SyncVar *s);
+
+SyncVar::SyncVar() : mtx(MutexTypeSyncVar) { Reset(0); }
+
+void SyncVar::Init(ThreadState *thr, uptr pc, uptr addr, u64 uid,
+                   bool save_stack) {
+  this->addr = addr;
+  this->uid = uid;
+  this->next = 0;
+
+  creation_stack_id = kInvalidStackID;
+  if (save_stack && !SANITIZER_GO)  // Go does not use them
+    creation_stack_id = CurrentStackId(thr, pc);
+  if (common_flags()->detect_deadlocks)
+    DDMutexInit(thr, pc, this);
+}
+
+void SyncVar::Reset(Processor *proc) {
+  uid = 0;
+  creation_stack_id = kInvalidStackID;
+  owner_tid = kInvalidTid;
+  last_lock = 0;
+  recursion = 0;
+  atomic_store_relaxed(&flags, 0);
+
+  if (proc == 0) {
+    CHECK_EQ(clock.size(), 0);
+    CHECK_EQ(read_clock.size(), 0);
+  } else {
+    clock.Reset(&proc->clock_cache);
+    read_clock.Reset(&proc->clock_cache);
+  }
+}
+
+MetaMap::MetaMap()
+    : block_alloc_(LINKER_INITIALIZED, "heap block allocator"),
+      sync_alloc_(LINKER_INITIALIZED, "sync allocator") {
+  atomic_store(&uid_gen_, 0, memory_order_relaxed);
+}
+
+void MetaMap::AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz) {
+  u32 idx = block_alloc_.Alloc(&thr->proc()->block_cache);
+  MBlock *b = block_alloc_.Map(idx);
+  b->siz = sz;
+  b->tag = 0;
+  b->tid = thr->tid;
+  b->stk = CurrentStackId(thr, pc);
+  u32 *meta = MemToMeta(p);
+  DCHECK_EQ(*meta, 0);
+  *meta = idx | kFlagBlock;
+}
+
+uptr MetaMap::FreeBlock(Processor *proc, uptr p) {
+  MBlock* b = GetBlock(p);
+  if (b == 0)
+    return 0;
+  uptr sz = RoundUpTo(b->siz, kMetaShadowCell);
+  FreeRange(proc, p, sz);
+  return sz;
+}
+
+bool MetaMap::FreeRange(Processor *proc, uptr p, uptr sz) {
+  bool has_something = false;
+  u32 *meta = MemToMeta(p);
+  u32 *end = MemToMeta(p + sz);
+  if (end == meta)
+    end++;
+  for (; meta < end; meta++) {
+    u32 idx = *meta;
+    if (idx == 0) {
+      // Note: don't write to meta in this case -- the block can be huge.
+      continue;
+    }
+    *meta = 0;
+    has_something = true;
+    while (idx != 0) {
+      if (idx & kFlagBlock) {
+        block_alloc_.Free(&proc->block_cache, idx & ~kFlagMask);
+        break;
+      } else if (idx & kFlagSync) {
+        DCHECK(idx & kFlagSync);
+        SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask);
+        u32 next = s->next;
+        s->Reset(proc);
+        sync_alloc_.Free(&proc->sync_cache, idx & ~kFlagMask);
+        idx = next;
+      } else {
+        CHECK(0);
+      }
+    }
+  }
+  return has_something;
+}
+
+// ResetRange removes all meta objects from the range.
+// It is called for large mmap-ed regions. The function is best-effort wrt
+// freeing of meta objects, because we don't want to page in the whole range
+// which can be huge. The function probes pages one-by-one until it finds a page
+// without meta objects, at this point it stops freeing meta objects. Because
+// thread stacks grow top-down, we do the same starting from end as well.
+void MetaMap::ResetRange(Processor *proc, uptr p, uptr sz) {
+  if (SANITIZER_GO) {
+    // UnmapOrDie/MmapFixedNoReserve does not work on Windows,
+    // so we do the optimization only for C/C++.
+    FreeRange(proc, p, sz);
+    return;
+  }
+  const uptr kMetaRatio = kMetaShadowCell / kMetaShadowSize;
+  const uptr kPageSize = GetPageSizeCached() * kMetaRatio;
+  if (sz <= 4 * kPageSize) {
+    // If the range is small, just do the normal free procedure.
+    FreeRange(proc, p, sz);
+    return;
+  }
+  // First, round both ends of the range to page size.
+  uptr 
diff  = RoundUp(p, kPageSize) - p;
+  if (
diff  != 0) {
+    FreeRange(proc, p, 
diff );
+    p += 
diff ;
+    sz -= 
diff ;
+  }
+  
diff  = p + sz - RoundDown(p + sz, kPageSize);
+  if (
diff  != 0) {
+    FreeRange(proc, p + sz - 
diff , 
diff );
+    sz -= 
diff ;
+  }
+  // Now we must have a non-empty page-aligned range.
+  CHECK_GT(sz, 0);
+  CHECK_EQ(p, RoundUp(p, kPageSize));
+  CHECK_EQ(sz, RoundUp(sz, kPageSize));
+  const uptr p0 = p;
+  const uptr sz0 = sz;
+  // Probe start of the range.
+  for (uptr checked = 0; sz > 0; checked += kPageSize) {
+    bool has_something = FreeRange(proc, p, kPageSize);
+    p += kPageSize;
+    sz -= kPageSize;
+    if (!has_something && checked > (128 << 10))
+      break;
+  }
+  // Probe end of the range.
+  for (uptr checked = 0; sz > 0; checked += kPageSize) {
+    bool has_something = FreeRange(proc, p + sz - kPageSize, kPageSize);
+    sz -= kPageSize;
+    // Stacks grow down, so sync object are most likely at the end of the region
+    // (if it is a stack). The very end of the stack is TLS and tsan increases
+    // TLS by at least 256K, so check at least 512K.
+    if (!has_something && checked > (512 << 10))
+      break;
+  }
+  // Finally, page out the whole range (including the parts that we've just
+  // freed). Note: we can't simply madvise, because we need to leave a zeroed
+  // range (otherwise __tsan_java_move can crash if it encounters a left-over
+  // meta objects in java heap).
+  uptr metap = (uptr)MemToMeta(p0);
+  uptr metasz = sz0 / kMetaRatio;
+  UnmapOrDie((void*)metap, metasz);
+  if (!MmapFixedSuperNoReserve(metap, metasz))
+    Die();
+}
+
+MBlock* MetaMap::GetBlock(uptr p) {
+  u32 *meta = MemToMeta(p);
+  u32 idx = *meta;
+  for (;;) {
+    if (idx == 0)
+      return 0;
+    if (idx & kFlagBlock)
+      return block_alloc_.Map(idx & ~kFlagMask);
+    DCHECK(idx & kFlagSync);
+    SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
+    idx = s->next;
+  }
+}
+
+SyncVar *MetaMap::GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
+                          bool save_stack) {
+  u32 *meta = MemToMeta(addr);
+  u32 idx0 = *meta;
+  u32 myidx = 0;
+  SyncVar *mys = nullptr;
+  for (;;) {
+    for (u32 idx = idx0; idx && !(idx & kFlagBlock);) {
+      DCHECK(idx & kFlagSync);
+      SyncVar * s = sync_alloc_.Map(idx & ~kFlagMask);
+      if (LIKELY(s->addr == addr)) {
+        if (UNLIKELY(myidx != 0)) {
+          mys->Reset(thr->proc());
+          sync_alloc_.Free(&thr->proc()->sync_cache, myidx);
+        }
+        return s;
+      }
+      idx = s->next;
+    }
+    if (!create)
+      return nullptr;
+    if (UNLIKELY(*meta != idx0)) {
+      idx0 = *meta;
+      continue;
+    }
+
+    if (LIKELY(myidx == 0)) {
+      const u64 uid = atomic_fetch_add(&uid_gen_, 1, memory_order_relaxed);
+      myidx = sync_alloc_.Alloc(&thr->proc()->sync_cache);
+      mys = sync_alloc_.Map(myidx);
+      mys->Init(thr, pc, addr, uid, save_stack);
+    }
+    mys->next = idx0;
+    if (atomic_compare_exchange_strong((atomic_uint32_t*)meta, &idx0,
+        myidx | kFlagSync, memory_order_release)) {
+      return mys;
+    }
+  }
+}
+
+void MetaMap::MoveMemory(uptr src, uptr dst, uptr sz) {
+  // src and dst can overlap,
+  // there are no concurrent accesses to the regions (e.g. stop-the-world).
+  CHECK_NE(src, dst);
+  CHECK_NE(sz, 0);
+  uptr 
diff  = dst - src;
+  u32 *src_meta = MemToMeta(src);
+  u32 *dst_meta = MemToMeta(dst);
+  u32 *src_meta_end = MemToMeta(src + sz);
+  uptr inc = 1;
+  if (dst > src) {
+    src_meta = MemToMeta(src + sz) - 1;
+    dst_meta = MemToMeta(dst + sz) - 1;
+    src_meta_end = MemToMeta(src) - 1;
+    inc = -1;
+  }
+  for (; src_meta != src_meta_end; src_meta += inc, dst_meta += inc) {
+    CHECK_EQ(*dst_meta, 0);
+    u32 idx = *src_meta;
+    *src_meta = 0;
+    *dst_meta = idx;
+    // Patch the addresses in sync objects.
+    while (idx != 0) {
+      if (idx & kFlagBlock)
+        break;
+      CHECK(idx & kFlagSync);
+      SyncVar *s = sync_alloc_.Map(idx & ~kFlagMask);
+      s->addr += 
diff ;
+      idx = s->next;
+    }
+  }
+}
+
+void MetaMap::OnProcIdle(Processor *proc) {
+  block_alloc_.FlushCache(&proc->block_cache);
+  sync_alloc_.FlushCache(&proc->sync_cache);
+}
+
+MetaMap::MemoryStats MetaMap::GetMemoryStats() const {
+  MemoryStats stats;
+  stats.mem_block = block_alloc_.AllocatedMemory();
+  stats.sync_obj = sync_alloc_.AllocatedMemory();
+  return stats;
+}
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_sync.h b/compiler-rt/lib/tsan/rtl-old/tsan_sync.h
new file mode 100644
index 0000000000000..fc8fa288a8418
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_sync.h
@@ -0,0 +1,153 @@
+//===-- tsan_sync.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_SYNC_H
+#define TSAN_SYNC_H
+
+#include "sanitizer_common/sanitizer_atomic.h"
+#include "sanitizer_common/sanitizer_common.h"
+#include "sanitizer_common/sanitizer_deadlock_detector_interface.h"
+#include "tsan_defs.h"
+#include "tsan_clock.h"
+#include "tsan_dense_alloc.h"
+
+namespace __tsan {
+
+// These need to match __tsan_mutex_* flags defined in tsan_interface.h.
+// See documentation there as well.
+enum MutexFlags {
+  MutexFlagLinkerInit          = 1 << 0, // __tsan_mutex_linker_init
+  MutexFlagWriteReentrant      = 1 << 1, // __tsan_mutex_write_reentrant
+  MutexFlagReadReentrant       = 1 << 2, // __tsan_mutex_read_reentrant
+  MutexFlagReadLock            = 1 << 3, // __tsan_mutex_read_lock
+  MutexFlagTryLock             = 1 << 4, // __tsan_mutex_try_lock
+  MutexFlagTryLockFailed       = 1 << 5, // __tsan_mutex_try_lock_failed
+  MutexFlagRecursiveLock       = 1 << 6, // __tsan_mutex_recursive_lock
+  MutexFlagRecursiveUnlock     = 1 << 7, // __tsan_mutex_recursive_unlock
+  MutexFlagNotStatic           = 1 << 8, // __tsan_mutex_not_static
+
+  // The following flags are runtime private.
+  // Mutex API misuse was detected, so don't report any more.
+  MutexFlagBroken              = 1 << 30,
+  // We did not intercept pre lock event, so handle it on post lock.
+  MutexFlagDoPreLockOnPostLock = 1 << 29,
+  // Must list all mutex creation flags.
+  MutexCreationFlagMask        = MutexFlagLinkerInit |
+                                 MutexFlagWriteReentrant |
+                                 MutexFlagReadReentrant |
+                                 MutexFlagNotStatic,
+};
+
+// SyncVar is a descriptor of a user synchronization object
+// (mutex or an atomic variable).
+struct SyncVar {
+  SyncVar();
+
+  uptr addr;  // overwritten by DenseSlabAlloc freelist
+  Mutex mtx;
+  u64 uid;  // Globally unique id.
+  StackID creation_stack_id;
+  Tid owner_tid;  // Set only by exclusive owners.
+  u64 last_lock;
+  int recursion;
+  atomic_uint32_t flags;
+  u32 next;  // in MetaMap
+  DDMutex dd;
+  SyncClock read_clock;  // Used for rw mutexes only.
+  // The clock is placed last, so that it is situated on a 
diff erent cache line
+  // with the mtx. This reduces contention for hot sync objects.
+  SyncClock clock;
+
+  void Init(ThreadState *thr, uptr pc, uptr addr, u64 uid, bool save_stack);
+  void Reset(Processor *proc);
+
+  u64 GetId() const {
+    // 48 lsb is addr, then 14 bits is low part of uid, then 2 zero bits.
+    return GetLsb((u64)addr | (uid << 48), 60);
+  }
+  bool CheckId(u64 uid) const {
+    CHECK_EQ(uid, GetLsb(uid, 14));
+    return GetLsb(this->uid, 14) == uid;
+  }
+  static uptr SplitId(u64 id, u64 *uid) {
+    *uid = id >> 48;
+    return (uptr)GetLsb(id, 48);
+  }
+
+  bool IsFlagSet(u32 f) const {
+    return atomic_load_relaxed(&flags) & f;
+  }
+
+  void SetFlags(u32 f) {
+    atomic_store_relaxed(&flags, atomic_load_relaxed(&flags) | f);
+  }
+
+  void UpdateFlags(u32 flagz) {
+    // Filter out operation flags.
+    if (!(flagz & MutexCreationFlagMask))
+      return;
+    u32 current = atomic_load_relaxed(&flags);
+    if (current & MutexCreationFlagMask)
+      return;
+    // Note: this can be called from MutexPostReadLock which holds only read
+    // lock on the SyncVar.
+    atomic_store_relaxed(&flags, current | (flagz & MutexCreationFlagMask));
+  }
+};
+
+// MetaMap maps app addresses to heap block (MBlock) and sync var (SyncVar)
+// descriptors. It uses 1/2 direct shadow, see tsan_platform.h for the mapping.
+class MetaMap {
+ public:
+  MetaMap();
+
+  void AllocBlock(ThreadState *thr, uptr pc, uptr p, uptr sz);
+  uptr FreeBlock(Processor *proc, uptr p);
+  bool FreeRange(Processor *proc, uptr p, uptr sz);
+  void ResetRange(Processor *proc, uptr p, uptr sz);
+  MBlock* GetBlock(uptr p);
+
+  SyncVar *GetSyncOrCreate(ThreadState *thr, uptr pc, uptr addr,
+                           bool save_stack) {
+    return GetSync(thr, pc, addr, true, save_stack);
+  }
+  SyncVar *GetSyncIfExists(uptr addr) {
+    return GetSync(nullptr, 0, addr, false, false);
+  }
+
+  void MoveMemory(uptr src, uptr dst, uptr sz);
+
+  void OnProcIdle(Processor *proc);
+
+  struct MemoryStats {
+    uptr mem_block;
+    uptr sync_obj;
+  };
+
+  MemoryStats GetMemoryStats() const;
+
+ private:
+  static const u32 kFlagMask  = 3u << 30;
+  static const u32 kFlagBlock = 1u << 30;
+  static const u32 kFlagSync  = 2u << 30;
+  typedef DenseSlabAlloc<MBlock, 1 << 18, 1 << 12, kFlagMask> BlockAlloc;
+  typedef DenseSlabAlloc<SyncVar, 1 << 20, 1 << 10, kFlagMask> SyncAlloc;
+  BlockAlloc block_alloc_;
+  SyncAlloc sync_alloc_;
+  atomic_uint64_t uid_gen_;
+
+  SyncVar *GetSync(ThreadState *thr, uptr pc, uptr addr, bool create,
+                   bool save_stack);
+};
+
+}  // namespace __tsan
+
+#endif  // TSAN_SYNC_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_trace.h b/compiler-rt/lib/tsan/rtl-old/tsan_trace.h
new file mode 100644
index 0000000000000..ffc8c991ece0b
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_trace.h
@@ -0,0 +1,252 @@
+//===-- tsan_trace.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_TRACE_H
+#define TSAN_TRACE_H
+
+#include "tsan_defs.h"
+#include "tsan_ilist.h"
+#include "tsan_mutexset.h"
+#include "tsan_stack_trace.h"
+
+namespace __tsan {
+
+const int kTracePartSizeBits = 13;
+const int kTracePartSize = 1 << kTracePartSizeBits;
+const int kTraceParts = 2 * 1024 * 1024 / kTracePartSize;
+const int kTraceSize = kTracePartSize * kTraceParts;
+
+// Must fit into 3 bits.
+enum EventType {
+  EventTypeMop,
+  EventTypeFuncEnter,
+  EventTypeFuncExit,
+  EventTypeLock,
+  EventTypeUnlock,
+  EventTypeRLock,
+  EventTypeRUnlock
+};
+
+// Represents a thread event (from most significant bit):
+// u64 typ  : 3;   // EventType.
+// u64 addr : 61;  // Associated pc.
+typedef u64 Event;
+
+const uptr kEventPCBits = 61;
+
+struct TraceHeader {
+#if !SANITIZER_GO
+  BufferedStackTrace stack0;  // Start stack for the trace.
+#else
+  VarSizeStackTrace stack0;
+#endif
+  u64        epoch0;  // Start epoch for the trace.
+  MutexSet   mset0;
+
+  TraceHeader() : stack0(), epoch0() {}
+};
+
+struct Trace {
+  Mutex mtx;
+#if !SANITIZER_GO
+  // Must be last to catch overflow as paging fault.
+  // Go shadow stack is dynamically allocated.
+  uptr shadow_stack[kShadowStackSize];
+#endif
+  // Must be the last field, because we unmap the unused part in
+  // CreateThreadContext.
+  TraceHeader headers[kTraceParts];
+
+  Trace() : mtx(MutexTypeTrace) {}
+};
+
+namespace v3 {
+
+enum class EventType : u64 {
+  kAccessExt,
+  kAccessRange,
+  kLock,
+  kRLock,
+  kUnlock,
+  kTime,
+};
+
+// "Base" type for all events for type dispatch.
+struct Event {
+  // We use variable-length type encoding to give more bits to some event
+  // types that need them. If is_access is set, this is EventAccess.
+  // Otherwise, if is_func is set, this is EventFunc.
+  // Otherwise type denotes the type.
+  u64 is_access : 1;
+  u64 is_func : 1;
+  EventType type : 3;
+  u64 _ : 59;
+};
+static_assert(sizeof(Event) == 8, "bad Event size");
+
+// Nop event used as padding and does not affect state during replay.
+static constexpr Event NopEvent = {1, 0, EventType::kAccessExt, 0};
+
+// Compressed memory access can represent only some events with PCs
+// close enough to each other. Otherwise we fall back to EventAccessExt.
+struct EventAccess {
+  static constexpr uptr kPCBits = 15;
+  static_assert(kPCBits + kCompressedAddrBits + 5 == 64,
+                "unused bits in EventAccess");
+
+  u64 is_access : 1;  // = 1
+  u64 is_read : 1;
+  u64 is_atomic : 1;
+  u64 size_log : 2;
+  u64 pc_delta : kPCBits;  // signed delta from the previous memory access PC
+  u64 addr : kCompressedAddrBits;
+};
+static_assert(sizeof(EventAccess) == 8, "bad EventAccess size");
+
+// Function entry (pc != 0) or exit (pc == 0).
+struct EventFunc {
+  u64 is_access : 1;  // = 0
+  u64 is_func : 1;    // = 1
+  u64 pc : 62;
+};
+static_assert(sizeof(EventFunc) == 8, "bad EventFunc size");
+
+// Extended memory access with full PC.
+struct EventAccessExt {
+  // Note: precisely specifying the unused parts of the bitfield is critical for
+  // performance. If we don't specify them, compiler will generate code to load
+  // the old value and shuffle it to extract the unused bits to apply to the new
+  // value. If we specify the unused part and store 0 in there, all that
+  // unnecessary code goes away (store of the 0 const is combined with other
+  // constant parts).
+  static constexpr uptr kUnusedBits = 11;
+  static_assert(kCompressedAddrBits + kUnusedBits + 9 == 64,
+                "unused bits in EventAccessExt");
+
+  u64 is_access : 1;   // = 0
+  u64 is_func : 1;     // = 0
+  EventType type : 3;  // = EventType::kAccessExt
+  u64 is_read : 1;
+  u64 is_atomic : 1;
+  u64 size_log : 2;
+  u64 _ : kUnusedBits;
+  u64 addr : kCompressedAddrBits;
+  u64 pc;
+};
+static_assert(sizeof(EventAccessExt) == 16, "bad EventAccessExt size");
+
+// Access to a memory range.
+struct EventAccessRange {
+  static constexpr uptr kSizeLoBits = 13;
+  static_assert(kCompressedAddrBits + kSizeLoBits + 7 == 64,
+                "unused bits in EventAccessRange");
+
+  u64 is_access : 1;   // = 0
+  u64 is_func : 1;     // = 0
+  EventType type : 3;  // = EventType::kAccessRange
+  u64 is_read : 1;
+  u64 is_free : 1;
+  u64 size_lo : kSizeLoBits;
+  u64 pc : kCompressedAddrBits;
+  u64 addr : kCompressedAddrBits;
+  u64 size_hi : 64 - kCompressedAddrBits;
+};
+static_assert(sizeof(EventAccessRange) == 16, "bad EventAccessRange size");
+
+// Mutex lock.
+struct EventLock {
+  static constexpr uptr kStackIDLoBits = 15;
+  static constexpr uptr kStackIDHiBits =
+      sizeof(StackID) * kByteBits - kStackIDLoBits;
+  static constexpr uptr kUnusedBits = 3;
+  static_assert(kCompressedAddrBits + kStackIDLoBits + 5 == 64,
+                "unused bits in EventLock");
+  static_assert(kCompressedAddrBits + kStackIDHiBits + kUnusedBits == 64,
+                "unused bits in EventLock");
+
+  u64 is_access : 1;   // = 0
+  u64 is_func : 1;     // = 0
+  EventType type : 3;  // = EventType::kLock or EventType::kRLock
+  u64 pc : kCompressedAddrBits;
+  u64 stack_lo : kStackIDLoBits;
+  u64 stack_hi : sizeof(StackID) * kByteBits - kStackIDLoBits;
+  u64 _ : kUnusedBits;
+  u64 addr : kCompressedAddrBits;
+};
+static_assert(sizeof(EventLock) == 16, "bad EventLock size");
+
+// Mutex unlock.
+struct EventUnlock {
+  static constexpr uptr kUnusedBits = 15;
+  static_assert(kCompressedAddrBits + kUnusedBits + 5 == 64,
+                "unused bits in EventUnlock");
+
+  u64 is_access : 1;   // = 0
+  u64 is_func : 1;     // = 0
+  EventType type : 3;  // = EventType::kUnlock
+  u64 _ : kUnusedBits;
+  u64 addr : kCompressedAddrBits;
+};
+static_assert(sizeof(EventUnlock) == 8, "bad EventUnlock size");
+
+// Time change event.
+struct EventTime {
+  static constexpr uptr kUnusedBits = 37;
+  static_assert(kUnusedBits + sizeof(Sid) * kByteBits + kEpochBits + 5 == 64,
+                "unused bits in EventTime");
+
+  u64 is_access : 1;   // = 0
+  u64 is_func : 1;     // = 0
+  EventType type : 3;  // = EventType::kTime
+  u64 sid : sizeof(Sid) * kByteBits;
+  u64 epoch : kEpochBits;
+  u64 _ : kUnusedBits;
+};
+static_assert(sizeof(EventTime) == 8, "bad EventTime size");
+
+struct Trace;
+
+struct TraceHeader {
+  Trace* trace = nullptr;  // back-pointer to Trace containing this part
+  INode trace_parts;       // in Trace::parts
+};
+
+struct TracePart : TraceHeader {
+  // There are a lot of goroutines in Go, so we use smaller parts.
+  static constexpr uptr kByteSize = (SANITIZER_GO ? 128 : 256) << 10;
+  static constexpr uptr kSize =
+      (kByteSize - sizeof(TraceHeader)) / sizeof(Event);
+  // TraceAcquire does a fast event pointer overflow check by comparing
+  // pointer into TracePart::events with kAlignment mask. Since TracePart's
+  // are allocated page-aligned, this check detects end of the array
+  // (it also have false positives in the middle that are filtered separately).
+  // This also requires events to be the last field.
+  static constexpr uptr kAlignment = 0xff0;
+  Event events[kSize];
+
+  TracePart() {}
+};
+static_assert(sizeof(TracePart) == TracePart::kByteSize, "bad TracePart size");
+
+struct Trace {
+  Mutex mtx;
+  IList<TraceHeader, &TraceHeader::trace_parts, TracePart> parts;
+  Event* final_pos =
+      nullptr;  // final position in the last part for finished threads
+
+  Trace() : mtx(MutexTypeTrace) {}
+};
+
+}  // namespace v3
+
+}  // namespace __tsan
+
+#endif  // TSAN_TRACE_H

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_update_shadow_word.inc b/compiler-rt/lib/tsan/rtl-old/tsan_update_shadow_word.inc
new file mode 100644
index 0000000000000..a58ef0f17efa1
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_update_shadow_word.inc
@@ -0,0 +1,59 @@
+//===-- tsan_update_shadow_word.inc -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// Body of the hottest inner loop.
+// If we wrap this body into a function, compilers (both gcc and clang)
+// produce sligtly less efficient code.
+//===----------------------------------------------------------------------===//
+do {
+  const unsigned kAccessSize = 1 << kAccessSizeLog;
+  u64 *sp = &shadow_mem[idx];
+  old = LoadShadow(sp);
+  if (LIKELY(old.IsZero())) {
+    if (!stored) {
+      StoreIfNotYetStored(sp, &store_word);
+      stored = true;
+    }
+    break;
+  }
+  // is the memory access equal to the previous?
+  if (LIKELY(Shadow::Addr0AndSizeAreEqual(cur, old))) {
+    // same thread?
+    if (LIKELY(Shadow::TidsAreEqual(old, cur))) {
+      if (LIKELY(old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic))) {
+        StoreIfNotYetStored(sp, &store_word);
+        stored = true;
+      }
+      break;
+    }
+    if (HappensBefore(old, thr)) {
+      if (old.IsRWWeakerOrEqual(kAccessIsWrite, kIsAtomic)) {
+        StoreIfNotYetStored(sp, &store_word);
+        stored = true;
+      }
+      break;
+    }
+    if (LIKELY(old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic)))
+      break;
+    goto RACE;
+  }
+  // Do the memory access intersect?
+  if (Shadow::TwoRangesIntersect(old, cur, kAccessSize)) {
+    if (Shadow::TidsAreEqual(old, cur))
+      break;
+    if (old.IsBothReadsOrAtomic(kAccessIsWrite, kIsAtomic))
+      break;
+    if (LIKELY(HappensBefore(old, thr)))
+      break;
+    goto RACE;
+  }
+  // The accesses do not intersect.
+  break;
+} while (0);

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_vector_clock.cpp b/compiler-rt/lib/tsan/rtl-old/tsan_vector_clock.cpp
new file mode 100644
index 0000000000000..278298565d3f8
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_vector_clock.cpp
@@ -0,0 +1,126 @@
+//===-- tsan_vector_clock.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#include "tsan_vector_clock.h"
+
+#include "sanitizer_common/sanitizer_placement_new.h"
+#include "tsan_mman.h"
+
+namespace __tsan {
+
+#if TSAN_VECTORIZE
+const uptr kVectorClockSize = kThreadSlotCount * sizeof(Epoch) / sizeof(m128);
+#endif
+
+VectorClock::VectorClock() { Reset(); }
+
+void VectorClock::Reset() {
+#if !TSAN_VECTORIZE
+  for (uptr i = 0; i < kThreadSlotCount; i++)
+    clk_[i] = kEpochZero;
+#else
+  m128 z = _mm_setzero_si128();
+  m128* vclk = reinterpret_cast<m128*>(clk_);
+  for (uptr i = 0; i < kVectorClockSize; i++) _mm_store_si128(&vclk[i], z);
+#endif
+}
+
+void VectorClock::Acquire(const VectorClock* src) {
+  if (!src)
+    return;
+#if !TSAN_VECTORIZE
+  for (uptr i = 0; i < kThreadSlotCount; i++)
+    clk_[i] = max(clk_[i], src->clk_[i]);
+#else
+  m128* __restrict vdst = reinterpret_cast<m128*>(clk_);
+  m128 const* __restrict vsrc = reinterpret_cast<m128 const*>(src->clk_);
+  for (uptr i = 0; i < kVectorClockSize; i++) {
+    m128 s = _mm_load_si128(&vsrc[i]);
+    m128 d = _mm_load_si128(&vdst[i]);
+    m128 m = _mm_max_epu16(s, d);
+    _mm_store_si128(&vdst[i], m);
+  }
+#endif
+}
+
+static VectorClock* AllocClock(VectorClock** dstp) {
+  if (UNLIKELY(!*dstp))
+    *dstp = New<VectorClock>();
+  return *dstp;
+}
+
+void VectorClock::Release(VectorClock** dstp) const {
+  VectorClock* dst = AllocClock(dstp);
+  dst->Acquire(this);
+}
+
+void VectorClock::ReleaseStore(VectorClock** dstp) const {
+  VectorClock* dst = AllocClock(dstp);
+  *dst = *this;
+}
+
+VectorClock& VectorClock::operator=(const VectorClock& other) {
+#if !TSAN_VECTORIZE
+  for (uptr i = 0; i < kThreadSlotCount; i++)
+    clk_[i] = other.clk_[i];
+#else
+  m128* __restrict vdst = reinterpret_cast<m128*>(clk_);
+  m128 const* __restrict vsrc = reinterpret_cast<m128 const*>(other.clk_);
+  for (uptr i = 0; i < kVectorClockSize; i++) {
+    m128 s = _mm_load_si128(&vsrc[i]);
+    _mm_store_si128(&vdst[i], s);
+  }
+#endif
+  return *this;
+}
+
+void VectorClock::ReleaseStoreAcquire(VectorClock** dstp) {
+  VectorClock* dst = AllocClock(dstp);
+#if !TSAN_VECTORIZE
+  for (uptr i = 0; i < kThreadSlotCount; i++) {
+    Epoch tmp = dst->clk_[i];
+    dst->clk_[i] = clk_[i];
+    clk_[i] = max(clk_[i], tmp);
+  }
+#else
+  m128* __restrict vdst = reinterpret_cast<m128*>(dst->clk_);
+  m128* __restrict vclk = reinterpret_cast<m128*>(clk_);
+  for (uptr i = 0; i < kVectorClockSize; i++) {
+    m128 t = _mm_load_si128(&vdst[i]);
+    m128 c = _mm_load_si128(&vclk[i]);
+    m128 m = _mm_max_epu16(c, t);
+    _mm_store_si128(&vdst[i], c);
+    _mm_store_si128(&vclk[i], m);
+  }
+#endif
+}
+
+void VectorClock::ReleaseAcquire(VectorClock** dstp) {
+  VectorClock* dst = AllocClock(dstp);
+#if !TSAN_VECTORIZE
+  for (uptr i = 0; i < kThreadSlotCount; i++) {
+    dst->clk_[i] = max(dst->clk_[i], clk_[i]);
+    clk_[i] = dst->clk_[i];
+  }
+#else
+  m128* __restrict vdst = reinterpret_cast<m128*>(dst->clk_);
+  m128* __restrict vclk = reinterpret_cast<m128*>(clk_);
+  for (uptr i = 0; i < kVectorClockSize; i++) {
+    m128 c = _mm_load_si128(&vclk[i]);
+    m128 d = _mm_load_si128(&vdst[i]);
+    m128 m = _mm_max_epu16(c, d);
+    _mm_store_si128(&vdst[i], m);
+    _mm_store_si128(&vclk[i], m);
+  }
+#endif
+}
+
+}  // namespace __tsan

diff  --git a/compiler-rt/lib/tsan/rtl-old/tsan_vector_clock.h b/compiler-rt/lib/tsan/rtl-old/tsan_vector_clock.h
new file mode 100644
index 0000000000000..63b206302190d
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl-old/tsan_vector_clock.h
@@ -0,0 +1,51 @@
+//===-- tsan_vector_clock.h -------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_VECTOR_CLOCK_H
+#define TSAN_VECTOR_CLOCK_H
+
+#include "tsan_defs.h"
+
+namespace __tsan {
+
+// Fixed-size vector clock, used both for threads and sync objects.
+class VectorClock {
+ public:
+  VectorClock();
+
+  Epoch Get(Sid sid) const;
+  void Set(Sid sid, Epoch v);
+
+  void Reset();
+  void Acquire(const VectorClock* src);
+  void Release(VectorClock** dstp) const;
+  void ReleaseStore(VectorClock** dstp) const;
+  void ReleaseStoreAcquire(VectorClock** dstp);
+  void ReleaseAcquire(VectorClock** dstp);
+
+  VectorClock& operator=(const VectorClock& other);
+
+ private:
+  Epoch clk_[kThreadSlotCount] VECTOR_ALIGNED;
+};
+
+ALWAYS_INLINE Epoch VectorClock::Get(Sid sid) const {
+  return clk_[static_cast<u8>(sid)];
+}
+
+ALWAYS_INLINE void VectorClock::Set(Sid sid, Epoch v) {
+  DCHECK_GE(v, clk_[static_cast<u8>(sid)]);
+  clk_[static_cast<u8>(sid)] = v;
+}
+
+}  // namespace __tsan
+
+#endif  // TSAN_VECTOR_CLOCK_H


        


More information about the llvm-commits mailing list