[Openmp-commits] [llvm] [compiler-rt] [openmp] [TSan] Add instrumentation of AVX2 and AVX512 instructions (PR #74636)

via Openmp-commits openmp-commits at lists.llvm.org
Wed Dec 6 10:11:44 PST 2023


https://github.com/jprotze created https://github.com/llvm/llvm-project/pull/74636

Currently, ThreadSanitizer only instruments memory accesses up to a width of 128 bit and explicitly skips instrumentation of wider memory accesses. This means that TSan is blind for AVX2 and AVX512 memory instructions.

This patch adds instrumentation and runtime support for 256bit and 512bit memory loads/stores. Additionally, vector gather/scatter instructions are considered for instrumentation. These instructions allow to gather individual data elements from memory into a single vector register and scatter the elements from a vector register into individual memory locations.
Since the vector of addresses is passed as a 256bit / 512bit vector, the new interface functions are compiled separately with the specific compiler flags. This avoids that AVX instructions are introduced into other parts of the runtime. Since the new interface is only called on architectures that actually support AVX instructions, this separation maintains the portability of the runtime.

Some of the tests use `#pragma omp simd` as a portable way to generate vector instructions across architectures. The construct is independent of the OpenMP runtime. Therefore the tests used base-language threading.
Some of the tests directly call into the new runtime functions, since we found no way to actually generate scatter/gather instructions with masks different from 0xFF.

>From 50911253f4df8ea88b42535c329a5fa6ed34e844 Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Wed, 29 Nov 2023 16:32:44 +0100
Subject: [PATCH 1/2] Add simd support to tsan

---
 compiler-rt/cmake/Modules/AddCompilerRT.cmake |  4 +-
 compiler-rt/cmake/config-ix.cmake             |  2 +
 compiler-rt/lib/tsan/rtl/CMakeLists.txt       | 15 +++
 compiler-rt/lib/tsan/rtl/tsan_interface.cpp   | 37 ++++++--
 compiler-rt/lib/tsan/rtl/tsan_interface.h     |  4 +
 compiler-rt/lib/tsan/rtl/tsan_interface.inc   | 27 ++++++
 .../lib/tsan/rtl/tsan_interface_avx2.cpp      | 37 ++++++++
 .../lib/tsan/rtl/tsan_interface_avx2.h        | 46 ++++++++++
 .../lib/tsan/rtl/tsan_interface_avx512.cpp    | 43 +++++++++
 .../lib/tsan/rtl/tsan_interface_avx512.h      | 46 ++++++++++
 compiler-rt/test/tsan/simd_broadcast_norace.c | 45 +++++++++
 compiler-rt/test/tsan/simd_broadcast_race.c   | 43 +++++++++
 compiler-rt/test/tsan/simd_gather_race.c      | 44 +++++++++
 .../test/tsan/simd_gatherscatter_norace.c     | 45 +++++++++
 compiler-rt/test/tsan/simd_loadstore_norace.c | 45 +++++++++
 compiler-rt/test/tsan/simd_loadstore_race.c   | 44 +++++++++
 .../test/tsan/simd_scatter_mask_norace.c      | 56 ++++++++++++
 .../test/tsan/simd_scatter_mask_race.c        | 55 +++++++++++
 compiler-rt/test/tsan/simd_scatter_race.c     | 44 +++++++++
 .../Instrumentation/ThreadSanitizer.cpp       | 91 ++++++++++++++++---
 openmp/tools/archer/tests/lit.cfg             |  2 +-
 .../archer/tests/simd/simd-broadcast-no.c     | 44 +++++++++
 .../archer/tests/simd/simd-broadcast-yes.c    | 55 +++++++++++
 .../tools/archer/tests/simd/simd-gather-yes.c | 63 +++++++++++++
 .../archer/tests/simd/simd-gatherscatter-no.c | 46 ++++++++++
 .../archer/tests/simd/simd-loadstore-no.c     | 46 ++++++++++
 .../archer/tests/simd/simd-loadstore-yes.c    | 57 ++++++++++++
 .../archer/tests/simd/simd-scatter-yes.c      | 63 +++++++++++++
 28 files changed, 1127 insertions(+), 22 deletions(-)
 create mode 100644 compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp
 create mode 100644 compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
 create mode 100644 compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
 create mode 100644 compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
 create mode 100644 compiler-rt/test/tsan/simd_broadcast_norace.c
 create mode 100644 compiler-rt/test/tsan/simd_broadcast_race.c
 create mode 100644 compiler-rt/test/tsan/simd_gather_race.c
 create mode 100644 compiler-rt/test/tsan/simd_gatherscatter_norace.c
 create mode 100644 compiler-rt/test/tsan/simd_loadstore_norace.c
 create mode 100644 compiler-rt/test/tsan/simd_loadstore_race.c
 create mode 100644 compiler-rt/test/tsan/simd_scatter_mask_norace.c
 create mode 100644 compiler-rt/test/tsan/simd_scatter_mask_race.c
 create mode 100644 compiler-rt/test/tsan/simd_scatter_race.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-broadcast-no.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-broadcast-yes.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-gather-yes.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-gatherscatter-no.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-loadstore-no.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-loadstore-yes.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-scatter-yes.c

diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index 4d9b68a3cc25b..d85d649619032 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -385,8 +385,8 @@ function(add_compiler_rt_runtime name type)
       target_link_libraries(${libname} PRIVATE ${builtins_${libname}})
     endif()
     if(${type} STREQUAL "SHARED")
-      if(APPLE OR WIN32)
-        set_property(TARGET ${libname} PROPERTY BUILD_WITH_INSTALL_RPATH ON)
+      if(COMMAND llvm_setup_rpath)
+        llvm_setup_rpath(${libname})
       endif()
       if(WIN32 AND NOT CYGWIN AND NOT MINGW)
         set_target_properties(${libname} PROPERTIES IMPORT_PREFIX "")
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index a8e078f1ebc98..ab6200fce2455 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -100,6 +100,8 @@ check_cxx_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR
 check_cxx_compiler_flag(-fno-coverage-mapping COMPILER_RT_HAS_FNO_COVERAGE_MAPPING_FLAG)
 check_cxx_compiler_flag("-Werror -mcrc32"    COMPILER_RT_HAS_MCRC32_FLAG)
 check_cxx_compiler_flag("-Werror -msse4.2"   COMPILER_RT_HAS_MSSE4_2_FLAG)
+check_cxx_compiler_flag("-Werror -mavx2"   COMPILER_RT_HAS_MAVX2_FLAG)
+check_cxx_compiler_flag("-Werror -mavx512f"   COMPILER_RT_HAS_MAVX512F_FLAG)
 check_cxx_compiler_flag(--sysroot=.          COMPILER_RT_HAS_SYSROOT_FLAG)
 check_cxx_compiler_flag("-Werror -mcrc"      COMPILER_RT_HAS_MCRC_FLAG)
 check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG)
diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt
index 791c0596f65ab..4df1a6c8fca89 100644
--- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt
+++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt
@@ -241,6 +241,17 @@ else()
     else()
       set(TSAN_ASM_SOURCES)
     endif()
+    add_compiler_rt_object_libraries(RTTSanAVX2
+        ARCHS ${arch}
+        SOURCES tsan_interface_avx2.cpp
+        ADDITIONAL_HEADERS tsan_interface_avx2.h 
+        #CFLAGS ${TSAN_RTL_CFLAGS} $<IF:"$COMPILER_RT_HAS_MAVX2_FLAG","-mavx2","">)
+        CFLAGS ${TSAN_RTL_CFLAGS} $<IF:$<BOOL:${COMPILER_RT_HAS_MAVX2_FLAG}>,-mavx2,"">)
+    add_compiler_rt_object_libraries(RTTSanAVX512
+        ARCHS ${arch}
+        SOURCES tsan_interface_avx512.cpp
+        ADDITIONAL_HEADERS tsan_interface_avx512.h 
+        CFLAGS ${TSAN_RTL_CFLAGS} $<IF:$<BOOL:${COMPILER_RT_HAS_MAVX512F_FLAG}>,-mavx512f,"">)
     add_compiler_rt_runtime(clang_rt.tsan
       STATIC
       ARCHS ${arch}
@@ -252,6 +263,8 @@ else()
               $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
               $<TARGET_OBJECTS:RTSanitizerCommonSymbolizerInternal.${arch}>
               $<TARGET_OBJECTS:RTUbsan.${arch}>
+              $<TARGET_OBJECTS:RTTSanAVX2.${arch}>
+              $<TARGET_OBJECTS:RTTSanAVX512.${arch}>
       ADDITIONAL_HEADERS ${TSAN_HEADERS}
       CFLAGS ${TSAN_RTL_CFLAGS}
       PARENT_TARGET tsan)
@@ -276,6 +289,8 @@ else()
               $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
               $<TARGET_OBJECTS:RTSanitizerCommonSymbolizerInternal.${arch}>
               $<TARGET_OBJECTS:RTUbsan.${arch}>
+              $<TARGET_OBJECTS:RTTSanAVX2.${arch}>
+              $<TARGET_OBJECTS:RTTSanAVX512.${arch}>
       ADDITIONAL_HEADERS ${TSAN_HEADERS}
       CFLAGS ${TSAN_RTL_DYNAMIC_CFLAGS}
       DEFS SANITIZER_SHARED
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
index e6c4bf2e60a7b..c97cf62e2e9bd 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
@@ -13,6 +13,7 @@
 #include "tsan_interface.h"
 #include "tsan_interface_ann.h"
 #include "tsan_rtl.h"
+
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_ptrauth.h"
 
@@ -42,18 +43,42 @@ void __tsan_write16_pc(void *addr, void *pc) {
 
 // __tsan_unaligned_read/write calls are emitted by compiler.
 
-void __tsan_unaligned_read16(const void *addr) {
+template <unsigned int N>
+void __tsan_unaligned_readx(const void *addr) {
   uptr pc = CALLERPC;
   ThreadState *thr = cur_thread();
-  UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
-  UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
+  for (unsigned int i = 0; i < N / 8; i++)
+    UnalignedMemoryAccess(thr, pc, (uptr)addr + (i * 8), 8, kAccessRead);
 }
 
-void __tsan_unaligned_write16(void *addr) {
+template <unsigned int N>
+void __tsan_unaligned_writex(void *addr) {
   uptr pc = CALLERPC;
   ThreadState *thr = cur_thread();
-  UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
-  UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
+  for (unsigned int i = 0; i < N / 8; i++)
+    UnalignedMemoryAccess(thr, pc, (uptr)addr + (i * 8), 8, kAccessWrite);
+}
+
+void __tsan_unaligned_read16(const void *addr) {
+  __tsan_unaligned_readx<16>(addr);
+}
+
+void __tsan_unaligned_write16(void *addr) { __tsan_unaligned_writex<16>(addr); }
+
+extern "C" void __tsan_unaligned_read32(const void *addr) {
+  __tsan_unaligned_readx<32>(addr);
+}
+
+extern "C" void __tsan_unaligned_write32(void *addr) {
+  __tsan_unaligned_writex<32>(addr);
+}
+
+extern "C" void __tsan_unaligned_read64(const void *addr) {
+  __tsan_unaligned_readx<64>(addr);
+}
+
+extern "C" void __tsan_unaligned_write64(void *addr) {
+  __tsan_unaligned_writex<64>(addr);
 }
 
 extern "C" {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.h b/compiler-rt/lib/tsan/rtl/tsan_interface.h
index 3731c90d45915..ec24aaa9578d7 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.h
@@ -53,11 +53,15 @@ SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read2(const void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read4(const void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read8(const void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read16(const void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read32(const void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read64(const void *addr);
 
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write2(void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write4(void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write8(void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write16(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write32(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write64(void *addr);
 
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read1_pc(void *addr, void *pc);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read2_pc(void *addr, void *pc);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.inc b/compiler-rt/lib/tsan/rtl/tsan_interface.inc
index b0a424ff9c255..b7894e167db9e 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.inc
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.inc
@@ -38,6 +38,18 @@ void __tsan_read16(void *addr) {
   MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
 }
 
+extern "C" void __tsan_read32(void *addr) {
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessRead);
+}
+
+extern "C" void __tsan_read64(void *addr) {
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessRead);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 32, kAccessRead);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 48, kAccessRead);
+}
+
 void __tsan_write1(void *addr) {
   MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessWrite);
 }
@@ -58,6 +70,21 @@ void __tsan_write16(void *addr) {
   MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
 }
 
+extern "C" void __tsan_write32(void *addr) {
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessWrite);
+}
+
+extern "C" void __tsan_write64(void *addr) {
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessWrite);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 32, kAccessWrite);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 48, kAccessWrite);
+}
+
+// Our vector instructions
+// TODO
+
 void __tsan_read1_pc(void *addr, void *pc) {
   MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessRead | kAccessExternalPC);
 }
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp
new file mode 100644
index 0000000000000..cc50afd383d5b
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp
@@ -0,0 +1,37 @@
+#include "tsan_interface_avx2.h"
+
+#include <immintrin.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_ptrauth.h"
+#include "tsan_interface_ann.h"
+#include "tsan_rtl.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;
+
+#ifdef __AVX__
+extern "C" void __tsan_scatter_vector4(__m256i vaddr, int size, uint8_t mask) {
+  void *addr[4] = {};
+  _mm256_store_si256((__m256i *)addr, vaddr);
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  for (int i = 0; i < 4; i++)
+    if ((mask >> i) & 1)
+      UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessWrite);
+}
+
+extern "C" void __tsan_gather_vector4(__m256i vaddr, int size, uint8_t mask) {
+  void *addr[4] = {};
+  _mm256_store_si256((__m256i *)addr, vaddr);
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  for (int i = 0; i < 4; i++)
+    if ((mask >> i) & 1)
+      UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessRead);
+}
+#endif /*__AVX__*/
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
new file mode 100644
index 0000000000000..84c001be8855b
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
@@ -0,0 +1,46 @@
+//===-- tsan_interface_avx2.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// The functions declared in this header will be inserted by the instrumentation
+// module.
+// This header can be included by the instrumented program or by TSan tests.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_AVX2_H
+#define TSAN_INTERFACE_AVX2_H
+
+#include <immintrin.h>
+#include <sanitizer_common/sanitizer_internal_defs.h>
+#include <stdint.h>
+using __sanitizer::tid_t;
+using __sanitizer::uptr;
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __tsan_.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !SANITIZER_GO
+#  ifdef __AVX__
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_scatter_vector4(__m256i vaddr,
+                                                          int width,
+                                                          uint8_t mask);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_gather_vector4(__m256i vaddr,
+                                                         int width,
+                                                         uint8_t mask);
+#  endif /*__AVX__*/
+#endif   // SANITIZER_GO
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif /*TSAN_INTERFACE_AVX2_H*/
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
new file mode 100644
index 0000000000000..ab8fbf2af3a76
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
@@ -0,0 +1,43 @@
+#include "tsan_interface_avx512.h"
+
+#include <immintrin.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_ptrauth.h"
+#include "tsan_interface_ann.h"
+#include "tsan_rtl.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;
+
+#ifdef __AVX512F__
+extern "C" void __tsan_scatter_vector8(__m512i vaddr, int size, uint8_t mask) {
+  void *addr[8] = {};
+  __m256i v256_1 = _mm512_extracti64x4_epi64(vaddr, 0);
+  __m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 4);
+  _mm256_store_si256((__m256i *)addr, v256_1);
+  _mm256_store_si256((__m256i *)&(addr[4]), v256_2);
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  for (int i = 0; i < 8; i++)
+    if ((mask >> i) & 1)
+      UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessWrite);
+}
+
+extern "C" void __tsan_gather_vector8(__m512i vaddr, int size, uint8_t mask) {
+  void *addr[8] = {};
+  __m256i v256_1 = _mm512_extracti64x4_epi64(vaddr, 0);
+  __m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 4);
+  _mm256_store_si256((__m256i *)addr, v256_1);
+  _mm256_store_si256((__m256i *)(&addr[4]), v256_2);
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  for (int i = 0; i < 8; i++)
+    if ((mask >> i) & 1)
+      UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessRead);
+}
+#endif /*__AVX512F__*/
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
new file mode 100644
index 0000000000000..179f64a89a9f1
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
@@ -0,0 +1,46 @@
+//===-- tsan_interface_avx512.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// The functions declared in this header will be inserted by the instrumentation
+// module.
+// This header can be included by the instrumented program or by TSan tests.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_AVX512_H
+#define TSAN_INTERFACE_AVX512_H
+
+#include <immintrin.h>
+#include <sanitizer_common/sanitizer_internal_defs.h>
+#include <stdint.h>
+using __sanitizer::tid_t;
+using __sanitizer::uptr;
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __tsan_.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !SANITIZER_GO
+#  ifdef __AVX512F__
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_scatter_vector8(__m512i vaddr,
+                                                          int width,
+                                                          uint8_t mask);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_gather_vector8(__m512i vaddr,
+                                                         int width,
+                                                         uint8_t mask);
+#  endif /*__AVX512F__*/
+#endif   // SANITIZER_GO
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif /*TSAN_INTERFACE_AVX512_H*/
diff --git a/compiler-rt/test/tsan/simd_broadcast_norace.c b/compiler-rt/test/tsan/simd_broadcast_norace.c
new file mode 100644
index 0000000000000..3a7c2cfe279dc
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_broadcast_norace.c
@@ -0,0 +1,45 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE c;
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j] += c;
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_broadcast_race.c b/compiler-rt/test/tsan/simd_broadcast_race.c
new file mode 100644
index 0000000000000..08d6207ede722
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_broadcast_race.c
@@ -0,0 +1,43 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j] += A[64];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_gather_race.c b/compiler-rt/test/tsan/simd_gather_race.c
new file mode 100644
index 0000000000000..1d7c68a0bc93e
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_gather_race.c
@@ -0,0 +1,44 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j + CHUNK_SIZE] = A[j * 2] + B[j];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_gatherscatter_norace.c b/compiler-rt/test/tsan/simd_gatherscatter_norace.c
new file mode 100644
index 0000000000000..3f5994119223c
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_gatherscatter_norace.c
@@ -0,0 +1,45 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j * 2] += B[j];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_loadstore_norace.c b/compiler-rt/test/tsan/simd_loadstore_norace.c
new file mode 100644
index 0000000000000..ed3421d026a8e
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_loadstore_norace.c
@@ -0,0 +1,45 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j] += B[j];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_loadstore_race.c b/compiler-rt/test/tsan/simd_loadstore_race.c
new file mode 100644
index 0000000000000..4525404d1b8e5
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_loadstore_race.c
@@ -0,0 +1,44 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j + 64] = A[j] + B[j];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_scatter_mask_norace.c b/compiler-rt/test/tsan/simd_scatter_mask_norace.c
new file mode 100644
index 0000000000000..1526e3c9e05e5
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_scatter_mask_norace.c
@@ -0,0 +1,56 @@
+// RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=float %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=double %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=8 -DTYPE=float %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=8 -DTYPE=double %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+#include <immintrin.h>
+#include <stdint.h>
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+
+#if SIMDLEN == 4
+#  define tsan_scatter_func __tsan_scatter_vector4
+#  define intri_type __m256i
+#elif SIMDLEN == 8
+#  define tsan_scatter_func __tsan_scatter_vector8
+#  define intri_type __m512i
+#endif
+
+extern void tsan_scatter_func(intri_type, int, uint8_t);
+TYPE A[8];
+
+__attribute__((disable_sanitizer_instrumentation)) void *Thread(uint8_t mask) {
+#if SIMDLEN == 4
+  __m256i vaddr = _mm256_set_epi64x(
+#elif SIMDLEN == 8
+  __m512i vaddr = _mm512_set_epi64(
+      (int64_t)(A + 7), (int64_t)(A + 6), (int64_t)(A + 5), (int64_t)(A + 4),
+#endif
+      (int64_t)(A + 3), (int64_t)(A + 2), (int64_t)(A + 1), (int64_t)(A + 0));
+  tsan_scatter_func(vaddr, sizeof(TYPE), mask);
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0b01010101); }
+
+void *Thread2(void *x) { return Thread(0b10101010); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "DONE.\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_scatter_mask_race.c b/compiler-rt/test/tsan/simd_scatter_mask_race.c
new file mode 100644
index 0000000000000..8bf634494028a
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_scatter_mask_race.c
@@ -0,0 +1,55 @@
+// RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=float %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=double %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=8 -DTYPE=float %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=8 -DTYPE=double %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+#include <immintrin.h>
+#include <stdint.h>
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+
+#if SIMDLEN == 4
+#  define tsan_scatter_func __tsan_scatter_vector4
+#  define intri_type __m256i
+#elif SIMDLEN == 8
+#  define tsan_scatter_func __tsan_scatter_vector8
+#  define intri_type __m512i
+#endif
+
+extern void tsan_scatter_func(intri_type, int, uint8_t);
+TYPE A[8];
+
+__attribute__((disable_sanitizer_instrumentation)) void *Thread(uint8_t mask) {
+#if SIMDLEN == 4
+  __m256i vaddr = _mm256_set_epi64x(
+#elif SIMDLEN == 8
+  __m512i vaddr = _mm512_set_epi64(
+      (int64_t)(A + 7), (int64_t)(A + 6), (int64_t)(A + 5), (int64_t)(A + 4),
+#endif
+      (int64_t)(A + 3), (int64_t)(A + 2), (int64_t)(A + 1), (int64_t)(A + 0));
+  tsan_scatter_func(vaddr, sizeof(TYPE), mask);
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0b01010101); }
+
+void *Thread2(void *x) { return Thread(0b10101011); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_scatter_race.c b/compiler-rt/test/tsan/simd_scatter_race.c
new file mode 100644
index 0000000000000..84f9f4d51cab9
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_scatter_race.c
@@ -0,0 +1,44 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 2000
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j * 2] = A[j + CHUNK_SIZE] + B[j];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 8ee0bca7e354f..78b4f8edd3468 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -67,6 +67,9 @@ static cl::opt<bool> ClInstrumentAtomics("tsan-instrument-atomics",
                                          cl::init(true),
                                          cl::desc("Instrument atomics"),
                                          cl::Hidden);
+static cl::opt<bool> ClInstrumentSimd("tsan-instrument-simd", cl::init(true),
+                                      cl::desc("Instrument simd instructions"),
+                                      cl::Hidden);
 static cl::opt<bool> ClInstrumentMemIntrinsics(
     "tsan-instrument-memintrinsics", cl::init(true),
     cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
@@ -142,6 +145,7 @@ struct ThreadSanitizer {
   bool addrPointsToConstantData(Value *Addr);
   int getMemoryAccessFuncIndex(Type *OrigTy, Value *Addr, const DataLayout &DL);
   void InsertRuntimeIgnores(Function &F);
+  bool instrumentGatherOrScatter(Instruction *I, const DataLayout &DL);
 
   Type *IntptrTy;
   FunctionCallee TsanFuncEntry;
@@ -149,7 +153,7 @@ struct ThreadSanitizer {
   FunctionCallee TsanIgnoreBegin;
   FunctionCallee TsanIgnoreEnd;
   // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
-  static const size_t kNumberOfAccessSizes = 5;
+  static const size_t kNumberOfAccessSizes = 9;
   FunctionCallee TsanRead[kNumberOfAccessSizes];
   FunctionCallee TsanWrite[kNumberOfAccessSizes];
   FunctionCallee TsanUnalignedRead[kNumberOfAccessSizes];
@@ -170,6 +174,8 @@ struct ThreadSanitizer {
   FunctionCallee TsanVptrUpdate;
   FunctionCallee TsanVptrLoad;
   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
+  FunctionCallee TsanVectorScatter[2];
+  FunctionCallee TsanVectorGather[2];
 };
 
 void insertModuleCtor(Module &M) {
@@ -213,6 +219,26 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) {
   TsanIgnoreEnd =
       M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy());
   IntegerType *OrdTy = IRB.getInt32Ty();
+
+  TsanVectorScatter[0] = M.getOrInsertFunction(
+      SmallString<32>("__tsan_scatter_vector4"), Attr, IRB.getVoidTy(),
+      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(4)),
+      IRB.getInt32Ty(), IRB.getInt8Ty());
+  TsanVectorScatter[1] = M.getOrInsertFunction(
+      SmallString<32>("__tsan_scatter_vector8"), Attr, IRB.getVoidTy(),
+      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(8)),
+      IRB.getInt32Ty(),
+      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(4)),
+      IRB.getInt32Ty(), IRB.getInt8Ty());
+  TsanVectorGather[0] = M.getOrInsertFunction(
+      SmallString<32>("__tsan_gather_vector4"), Attr, IRB.getVoidTy(),
+      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(4)),
+      IRB.getInt32Ty(), IRB.getInt8Ty());
+  TsanVectorGather[1] = M.getOrInsertFunction(
+      SmallString<32>("__tsan_gather_vector8"), Attr, IRB.getVoidTy(),
+      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(8)),
+      IRB.getInt32Ty(), IRB.getInt8Ty());
+
   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
     const unsigned ByteSize = 1U << i;
     const unsigned BitSize = ByteSize * 8;
@@ -506,30 +532,40 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
 
   initialize(*F.getParent(), TLI);
   SmallVector<InstructionInfo, 8> AllLoadsAndStores;
-  SmallVector<Instruction*, 8> LocalLoadsAndStores;
-  SmallVector<Instruction*, 8> AtomicAccesses;
-  SmallVector<Instruction*, 8> MemIntrinCalls;
+  SmallVector<Instruction *, 8> LocalLoadsAndStores;
+  SmallVector<Instruction *, 8> AtomicAccesses;
+  SmallVector<Instruction *, 8> MemIntrinCalls;
+  SmallVector<Instruction *, 8> AllGathersAndScatters;
+
   bool Res = false;
   bool HasCalls = false;
   bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
   const DataLayout &DL = F.getParent()->getDataLayout();
 
-  // Traverse all instructions, collect loads/stores/returns, check for calls.
+  // Traverse all instructions, collect loads/stores/returns/gathers/scatters,
+  // check for calls.
   for (auto &BB : F) {
     for (auto &Inst : BB) {
       // Skip instructions inserted by another instrumentation.
       if (Inst.hasMetadata(LLVMContext::MD_nosanitize))
         continue;
-      if (isTsanAtomic(&Inst))
+      if (isTsanAtomic(&Inst)) {
         AtomicAccesses.push_back(&Inst);
-      else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
+      } else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) {
         LocalLoadsAndStores.push_back(&Inst);
-      else if ((isa<CallInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) ||
-               isa<InvokeInst>(Inst)) {
-        if (CallInst *CI = dyn_cast<CallInst>(&Inst))
+      } else if ((isa<CallInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) ||
+                 isa<InvokeInst>(Inst)) {
+        if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
+          auto CFunc = CI->getCalledFunction();
+          if (CFunc && (CFunc->getName().contains("llvm.masked.scatter") ||
+                        CFunc->getName().contains("llvm.masked.gather"))) {
+            AllGathersAndScatters.push_back(&Inst);
+          }
           maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
-        if (isa<MemIntrinsic>(Inst))
+        }
+        if (isa<MemIntrinsic>(Inst)) {
           MemIntrinCalls.push_back(&Inst);
+        }
         HasCalls = true;
         chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores,
                                        DL);
@@ -548,6 +584,12 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
       Res |= instrumentLoadOrStore(II, DL);
     }
 
+  // Instrument gather and scatter memory accesses
+  if (ClInstrumentSimd && ClInstrumentMemoryAccesses && SanitizeFunction)
+    for (const auto &II : AllGathersAndScatters) {
+      Res |= instrumentGatherOrScatter(II, DL);
+    }
+
   // Instrument atomic memory accesses in any case (they can be used to
   // implement synchronization).
   if (ClInstrumentAtomics)
@@ -658,6 +700,29 @@ bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
   return true;
 }
 
+bool ThreadSanitizer::instrumentGatherOrScatter(Instruction *I,
+                                                const DataLayout &DL) {
+  InstrumentationIRBuilder IRB(I);
+  StringRef FunctionNameRef =
+      dyn_cast<CallInst>(I)->getCalledFunction()->getName();
+  bool IsScatter = FunctionNameRef.contains("scatter");
+  unsigned OperandIdx = IsScatter ? 0 : 3;
+  unsigned NumElements =
+      cast<FixedVectorType>(I->getOperand(OperandIdx)->getType())
+          ->getNumElements();
+  unsigned BytesPerElement =
+      DL.getTypeSizeInBits(I->getOperand(OperandIdx)->getType()) / NumElements /
+      8;
+  FunctionCallee *TsanVector = IsScatter ? TsanVectorScatter : TsanVectorGather;
+  std::vector<Value *> Args{
+      I->getOperand(IsScatter ? 1 : 0),
+      llvm::ConstantInt::get(I->getContext(), llvm::APInt(32, BytesPerElement)),
+      IRB.CreateBitCast(I->getOperand(IsScatter ? 3 : 2), IRB.getInt8Ty())};
+  IRB.CreateCall(TsanVector[NumElements == 4 ? 0 : 1], Args);
+
+  return true;
+}
+
 static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
   uint32_t v = 0;
   switch (ord) {
@@ -804,8 +869,8 @@ int ThreadSanitizer::getMemoryAccessFuncIndex(Type *OrigTy, Value *Addr,
                                               const DataLayout &DL) {
   assert(OrigTy->isSized());
   uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
-  if (TypeSize != 8  && TypeSize != 16 &&
-      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+  if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 &&
+      TypeSize != 128 && TypeSize != 256 && TypeSize != 512) {
     NumAccessesWithBadSize++;
     // Ignore all unusual sizes.
     return -1;
diff --git a/openmp/tools/archer/tests/lit.cfg b/openmp/tools/archer/tests/lit.cfg
index 692cbfe97cf1e..cd19f9e95f700 100644
--- a/openmp/tools/archer/tests/lit.cfg
+++ b/openmp/tools/archer/tests/lit.cfg
@@ -115,7 +115,7 @@ config.substitutions.append(("%libarcher-cxx-compile-and-run", \
 config.substitutions.append(("%libarcher-cxx-compile", \
     "%clang-archerXX %openmp_flags %archer_flags %flags -std=c++17 %s -o %t" + libs))
 config.substitutions.append(("%libarcher-compile", \
-                             "%clang-archer %openmp_flags %archer_flags %flags %s -o %t" + libs))
+                             "%clang-archer %openmp_flags %archer_flags %flags -march=native %s -o %t" + libs))
 config.substitutions.append(("%libarcher-run-race", "%suppression %deflake %t 2>&1 | tee %t.log"))
 config.substitutions.append(("%libarcher-run-nosuppression", "%nosuppression %t 2>&1 | tee %t.log"))
 config.substitutions.append(("%libarcher-run", "%suppression %t 2>&1 | tee %t.log"))
diff --git a/openmp/tools/archer/tests/simd/simd-broadcast-no.c b/openmp/tools/archer/tests/simd/simd-broadcast-no.c
new file mode 100644
index 0000000000000..41bf837e8e478
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-broadcast-no.c
@@ -0,0 +1,44 @@
+/*
+ * simd-broadcast-no.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run | FileCheck %s
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run | FileCheck %s
+// REQUIRES: tsan
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+  if (argc > 1)
+    len = atoi(argv[1]);
+  double a[len];
+  for (int i = 0; i < len; i++)
+    a[i] = i;
+  TYPE c = M_PI;
+
+#pragma omp parallel for simd num_threads(2) schedule(dynamic, 64)
+  for (int i = 0; i < len; i++)
+    a[i] = a[i] + c;
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: ThreadSanitizer: data race
+// CHECK-NOT: ThreadSanitizer: reported
+// CHECK: DONE
diff --git a/openmp/tools/archer/tests/simd/simd-broadcast-yes.c b/openmp/tools/archer/tests/simd/simd-broadcast-yes.c
new file mode 100644
index 0000000000000..23a0f545092ab
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-broadcast-yes.c
@@ -0,0 +1,55 @@
+/*
+ * simd-broadcast-yes.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s 
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s 
+// REQUIRES: tsan
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+  if (argc > 1)
+    len = atoi(argv[1]);
+  TYPE a[len];
+  for (int i = 0; i < len; i++)
+    a[i] = i;
+  double c = M_PI;
+
+#pragma omp parallel for simd num_threads(2) schedule(dynamic, 64)
+  for (int i = 0; i < len; i++)
+    a[i] = a[i] + a[64];
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// FLOAT: WARNING: ThreadSanitizer: data race
+// FLOAT-NEXT:   {{(Write|Read)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-broadcast-yes.c
+// FLOAT:   Previous {{(read|write)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-broadcast-yes.c
+
+// DOUBLE: WARNING: ThreadSanitizer: data race
+// DOUBLE-NEXT:   {{(Write|Read)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-broadcast-yes.c
+// DOUBLE:   Previous {{(read|write)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-broadcast-yes.c
+
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported {{[0-9]+}} warnings
diff --git a/openmp/tools/archer/tests/simd/simd-gather-yes.c b/openmp/tools/archer/tests/simd/simd-gather-yes.c
new file mode 100644
index 0000000000000..881ab6f17dabc
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-gather-yes.c
@@ -0,0 +1,63 @@
+/*
+ * simd-gather-yes.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// REQUIRES: tsan
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+#ifndef SIMDLEN
+#define SIMDLEN 8
+#endif /*SIMDLEN*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+
+  if (argc > 1)
+    len = atoi(argv[1]);
+  TYPE a[2 * len], b[len];
+
+  for (int i = 0; i < 2 * len; i++)
+    a[i] = i;
+  for (int i = 0; i < len; i++)
+    b[i] = i + 1;
+
+#pragma omp parallel for simd schedule(dynamic, 64) simdlen(SIMDLEN)
+  for (int i = 0; i < len; i++)
+    a[i + 64] = a[i * 2] + b[i];
+
+  printf("DONE\n");
+  return 0;
+}
+
+// FLOAT: WARNING: ThreadSanitizer: data race
+// FLOAT-NEXT:   {{(Write|Read)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-gather-yes.c
+// FLOAT:   Previous {{(read|write)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-gather-yes.c
+
+// DOUBLE: WARNING: ThreadSanitizer: data race
+// DOUBLE-NEXT:   {{(Write|Read)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-gather-yes.c
+// DOUBLE:   Previous {{(read|write)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-gather-yes.c
+
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported {{[0-9]+}} warnings
diff --git a/openmp/tools/archer/tests/simd/simd-gatherscatter-no.c b/openmp/tools/archer/tests/simd/simd-gatherscatter-no.c
new file mode 100644
index 0000000000000..9c4d659ea2617
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-gatherscatter-no.c
@@ -0,0 +1,46 @@
+/*
+ * simd-gatherscatter-no.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run | FileCheck %s
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run | FileCheck %s
+// REQUIRES: tsan
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+
+  if (argc > 1)
+    len = atoi(argv[1]);
+  TYPE a[2 * len], b[len];
+
+  for (int i = 0; i < 2 * len; i++)
+    a[i] = i;
+  for (int i = 0; i < len; i++)
+    b[i] = i + 1;
+
+#pragma omp parallel for simd schedule(dynamic, 64)
+  for (int i = 0; i < len; i++)
+    a[i * 2] = a[i * 2] + b[i];
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: ThreadSanitizer: data race
+// CHECK-NOT: ThreadSanitizer: reported
+// CHECK: DONE
diff --git a/openmp/tools/archer/tests/simd/simd-loadstore-no.c b/openmp/tools/archer/tests/simd/simd-loadstore-no.c
new file mode 100644
index 0000000000000..1471f65639d77
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-loadstore-no.c
@@ -0,0 +1,46 @@
+/*
+ * simd-loadstore-no.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run | FileCheck %s
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run | FileCheck %s
+// REQUIRES: tsan
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+
+  if (argc > 1)
+    len = atoi(argv[1]);
+  TYPE a[len], b[len];
+
+  for (int i = 0; i < len; i++) {
+    a[i] = i;
+    b[i] = i + 1;
+  }
+
+#pragma omp parallel for simd schedule(dynamic, 64)
+  for (int i = 0; i < len; i++)
+    a[i] = a[i] + b[i];
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: ThreadSanitizer: data race
+// CHECK-NOT: ThreadSanitizer: reported
+// CHECK: DONE
diff --git a/openmp/tools/archer/tests/simd/simd-loadstore-yes.c b/openmp/tools/archer/tests/simd/simd-loadstore-yes.c
new file mode 100644
index 0000000000000..d122a0bda2afa
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-loadstore-yes.c
@@ -0,0 +1,57 @@
+/*
+ * simd-loadstore-yes.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// REQUIRES: tsan
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+
+  if (argc > 1)
+    len = atoi(argv[1]);
+  double a[len], b[len];
+
+  for (int i = 0; i < len; i++) {
+    a[i] = i;
+    b[i] = i + 1;
+  }
+
+#pragma omp parallel for simd schedule(dynamic, 64)
+  for (int i = 0; i < len - 64; i++)
+    a[i + 64] = a[i] + b[i];
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// FLOAT: WARNING: ThreadSanitizer: data race
+// FLOAT-NEXT:   {{(Write|Read)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-loadstore-yes.c
+// FLOAT:   Previous {{(read|write)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-loadstore-yes.c
+
+// DOUBLE: WARNING: ThreadSanitizer: data race
+// DOUBLE-NEXT:   {{(Write|Read)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-loadstore-yes.c
+// DOUBLE:   Previous {{(read|write)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-loadstore-yes.c
+
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported {{[0-9]+}} warnings
diff --git a/openmp/tools/archer/tests/simd/simd-scatter-yes.c b/openmp/tools/archer/tests/simd/simd-scatter-yes.c
new file mode 100644
index 0000000000000..fcf3178381572
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-scatter-yes.c
@@ -0,0 +1,63 @@
+/*
+ * simd-scatter-yes.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// REQUIRES: tsan
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+#ifndef SIMDLEN
+#define SIMDLEN 8
+#endif /*SIMDLEN*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+
+  if (argc > 1)
+    len = atoi(argv[1]);
+  TYPE a[2 * len], b[len];
+
+  for (int i = 0; i < 2 * len; i++)
+    a[i] = i;
+  for (int i = 0; i < len; i++)
+    b[i] = i + 1;
+
+#pragma omp parallel for simd schedule(dynamic, 64) simdlen(SIMDLEN)
+  for (int i = 0; i < len; i++)
+    a[i * 2] = a[i + 64] + b[i];
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// FLOAT: WARNING: ThreadSanitizer: data race
+// FLOAT-NEXT:   {{(Write|Read)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-scatter-yes.c
+// FLOAT:   Previous {{(read|write)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-scatter-yes.c
+
+// DOUBLE: WARNING: ThreadSanitizer: data race
+// DOUBLE-NEXT:   {{(Write|Read)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-scatter-yes.c
+// DOUBLE:   Previous {{(read|write)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-scatter-yes.c
+
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported {{[0-9]+}} warnings

>From e54345701a10dc8b161eb9353e2fd7dc86bbc44f Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Wed, 6 Dec 2023 19:01:57 +0100
Subject: [PATCH 2/2] Revert unwanted changes

---
 compiler-rt/cmake/Modules/AddCompilerRT.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index d85d649619032..4d9b68a3cc25b 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -385,8 +385,8 @@ function(add_compiler_rt_runtime name type)
       target_link_libraries(${libname} PRIVATE ${builtins_${libname}})
     endif()
     if(${type} STREQUAL "SHARED")
-      if(COMMAND llvm_setup_rpath)
-        llvm_setup_rpath(${libname})
+      if(APPLE OR WIN32)
+        set_property(TARGET ${libname} PROPERTY BUILD_WITH_INSTALL_RPATH ON)
       endif()
       if(WIN32 AND NOT CYGWIN AND NOT MINGW)
         set_target_properties(${libname} PROPERTIES IMPORT_PREFIX "")



More information about the Openmp-commits mailing list