[Openmp-commits] [compiler-rt] [llvm] [openmp] [TSan] Add instrumentation of AVX2 and AVX512 instructions (PR #74636)

via Openmp-commits openmp-commits at lists.llvm.org
Mon Dec 11 23:30:31 PST 2023


https://github.com/felilxtomski updated https://github.com/llvm/llvm-project/pull/74636

>From 50911253f4df8ea88b42535c329a5fa6ed34e844 Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Wed, 29 Nov 2023 16:32:44 +0100
Subject: [PATCH 01/14] Add simd support to tsan

---
 compiler-rt/cmake/Modules/AddCompilerRT.cmake |  4 +-
 compiler-rt/cmake/config-ix.cmake             |  2 +
 compiler-rt/lib/tsan/rtl/CMakeLists.txt       | 15 +++
 compiler-rt/lib/tsan/rtl/tsan_interface.cpp   | 37 ++++++--
 compiler-rt/lib/tsan/rtl/tsan_interface.h     |  4 +
 compiler-rt/lib/tsan/rtl/tsan_interface.inc   | 27 ++++++
 .../lib/tsan/rtl/tsan_interface_avx2.cpp      | 37 ++++++++
 .../lib/tsan/rtl/tsan_interface_avx2.h        | 46 ++++++++++
 .../lib/tsan/rtl/tsan_interface_avx512.cpp    | 43 +++++++++
 .../lib/tsan/rtl/tsan_interface_avx512.h      | 46 ++++++++++
 compiler-rt/test/tsan/simd_broadcast_norace.c | 45 +++++++++
 compiler-rt/test/tsan/simd_broadcast_race.c   | 43 +++++++++
 compiler-rt/test/tsan/simd_gather_race.c      | 44 +++++++++
 .../test/tsan/simd_gatherscatter_norace.c     | 45 +++++++++
 compiler-rt/test/tsan/simd_loadstore_norace.c | 45 +++++++++
 compiler-rt/test/tsan/simd_loadstore_race.c   | 44 +++++++++
 .../test/tsan/simd_scatter_mask_norace.c      | 56 ++++++++++++
 .../test/tsan/simd_scatter_mask_race.c        | 55 +++++++++++
 compiler-rt/test/tsan/simd_scatter_race.c     | 44 +++++++++
 .../Instrumentation/ThreadSanitizer.cpp       | 91 ++++++++++++++++---
 openmp/tools/archer/tests/lit.cfg             |  2 +-
 .../archer/tests/simd/simd-broadcast-no.c     | 44 +++++++++
 .../archer/tests/simd/simd-broadcast-yes.c    | 55 +++++++++++
 .../tools/archer/tests/simd/simd-gather-yes.c | 63 +++++++++++++
 .../archer/tests/simd/simd-gatherscatter-no.c | 46 ++++++++++
 .../archer/tests/simd/simd-loadstore-no.c     | 46 ++++++++++
 .../archer/tests/simd/simd-loadstore-yes.c    | 57 ++++++++++++
 .../archer/tests/simd/simd-scatter-yes.c      | 63 +++++++++++++
 28 files changed, 1127 insertions(+), 22 deletions(-)
 create mode 100644 compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp
 create mode 100644 compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
 create mode 100644 compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
 create mode 100644 compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
 create mode 100644 compiler-rt/test/tsan/simd_broadcast_norace.c
 create mode 100644 compiler-rt/test/tsan/simd_broadcast_race.c
 create mode 100644 compiler-rt/test/tsan/simd_gather_race.c
 create mode 100644 compiler-rt/test/tsan/simd_gatherscatter_norace.c
 create mode 100644 compiler-rt/test/tsan/simd_loadstore_norace.c
 create mode 100644 compiler-rt/test/tsan/simd_loadstore_race.c
 create mode 100644 compiler-rt/test/tsan/simd_scatter_mask_norace.c
 create mode 100644 compiler-rt/test/tsan/simd_scatter_mask_race.c
 create mode 100644 compiler-rt/test/tsan/simd_scatter_race.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-broadcast-no.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-broadcast-yes.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-gather-yes.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-gatherscatter-no.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-loadstore-no.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-loadstore-yes.c
 create mode 100644 openmp/tools/archer/tests/simd/simd-scatter-yes.c

diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index 4d9b68a3cc25b..d85d649619032 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -385,8 +385,8 @@ function(add_compiler_rt_runtime name type)
       target_link_libraries(${libname} PRIVATE ${builtins_${libname}})
     endif()
     if(${type} STREQUAL "SHARED")
-      if(APPLE OR WIN32)
-        set_property(TARGET ${libname} PROPERTY BUILD_WITH_INSTALL_RPATH ON)
+      if(COMMAND llvm_setup_rpath)
+        llvm_setup_rpath(${libname})
       endif()
       if(WIN32 AND NOT CYGWIN AND NOT MINGW)
         set_target_properties(${libname} PROPERTIES IMPORT_PREFIX "")
diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index a8e078f1ebc98..ab6200fce2455 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -100,6 +100,8 @@ check_cxx_compiler_flag(-fno-profile-instr-use COMPILER_RT_HAS_FNO_PROFILE_INSTR
 check_cxx_compiler_flag(-fno-coverage-mapping COMPILER_RT_HAS_FNO_COVERAGE_MAPPING_FLAG)
 check_cxx_compiler_flag("-Werror -mcrc32"    COMPILER_RT_HAS_MCRC32_FLAG)
 check_cxx_compiler_flag("-Werror -msse4.2"   COMPILER_RT_HAS_MSSE4_2_FLAG)
+check_cxx_compiler_flag("-Werror -mavx2"   COMPILER_RT_HAS_MAVX2_FLAG)
+check_cxx_compiler_flag("-Werror -mavx512f"   COMPILER_RT_HAS_MAVX512F_FLAG)
 check_cxx_compiler_flag(--sysroot=.          COMPILER_RT_HAS_SYSROOT_FLAG)
 check_cxx_compiler_flag("-Werror -mcrc"      COMPILER_RT_HAS_MCRC_FLAG)
 check_cxx_compiler_flag(-fno-partial-inlining COMPILER_RT_HAS_FNO_PARTIAL_INLINING_FLAG)
diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt
index 791c0596f65ab..4df1a6c8fca89 100644
--- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt
+++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt
@@ -241,6 +241,17 @@ else()
     else()
       set(TSAN_ASM_SOURCES)
     endif()
+    add_compiler_rt_object_libraries(RTTSanAVX2
+        ARCHS ${arch}
+        SOURCES tsan_interface_avx2.cpp
+        ADDITIONAL_HEADERS tsan_interface_avx2.h 
+        #CFLAGS ${TSAN_RTL_CFLAGS} $<IF:"$COMPILER_RT_HAS_MAVX2_FLAG","-mavx2","">)
+        CFLAGS ${TSAN_RTL_CFLAGS} $<IF:$<BOOL:${COMPILER_RT_HAS_MAVX2_FLAG}>,-mavx2,"">)
+    add_compiler_rt_object_libraries(RTTSanAVX512
+        ARCHS ${arch}
+        SOURCES tsan_interface_avx512.cpp
+        ADDITIONAL_HEADERS tsan_interface_avx512.h 
+        CFLAGS ${TSAN_RTL_CFLAGS} $<IF:$<BOOL:${COMPILER_RT_HAS_MAVX512F_FLAG}>,-mavx512f,"">)
     add_compiler_rt_runtime(clang_rt.tsan
       STATIC
       ARCHS ${arch}
@@ -252,6 +263,8 @@ else()
               $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
               $<TARGET_OBJECTS:RTSanitizerCommonSymbolizerInternal.${arch}>
               $<TARGET_OBJECTS:RTUbsan.${arch}>
+              $<TARGET_OBJECTS:RTTSanAVX2.${arch}>
+              $<TARGET_OBJECTS:RTTSanAVX512.${arch}>
       ADDITIONAL_HEADERS ${TSAN_HEADERS}
       CFLAGS ${TSAN_RTL_CFLAGS}
       PARENT_TARGET tsan)
@@ -276,6 +289,8 @@ else()
               $<TARGET_OBJECTS:RTSanitizerCommonSymbolizer.${arch}>
               $<TARGET_OBJECTS:RTSanitizerCommonSymbolizerInternal.${arch}>
               $<TARGET_OBJECTS:RTUbsan.${arch}>
+              $<TARGET_OBJECTS:RTTSanAVX2.${arch}>
+              $<TARGET_OBJECTS:RTTSanAVX512.${arch}>
       ADDITIONAL_HEADERS ${TSAN_HEADERS}
       CFLAGS ${TSAN_RTL_DYNAMIC_CFLAGS}
       DEFS SANITIZER_SHARED
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
index e6c4bf2e60a7b..c97cf62e2e9bd 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
@@ -13,6 +13,7 @@
 #include "tsan_interface.h"
 #include "tsan_interface_ann.h"
 #include "tsan_rtl.h"
+
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_ptrauth.h"
 
@@ -42,18 +43,42 @@ void __tsan_write16_pc(void *addr, void *pc) {
 
 // __tsan_unaligned_read/write calls are emitted by compiler.
 
-void __tsan_unaligned_read16(const void *addr) {
+template <unsigned int N>
+void __tsan_unaligned_readx(const void *addr) {
   uptr pc = CALLERPC;
   ThreadState *thr = cur_thread();
-  UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessRead);
-  UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessRead);
+  for (unsigned int i = 0; i < N / 8; i++)
+    UnalignedMemoryAccess(thr, pc, (uptr)addr + (i * 8), 8, kAccessRead);
 }
 
-void __tsan_unaligned_write16(void *addr) {
+template <unsigned int N>
+void __tsan_unaligned_writex(void *addr) {
   uptr pc = CALLERPC;
   ThreadState *thr = cur_thread();
-  UnalignedMemoryAccess(thr, pc, (uptr)addr, 8, kAccessWrite);
-  UnalignedMemoryAccess(thr, pc, (uptr)addr + 8, 8, kAccessWrite);
+  for (unsigned int i = 0; i < N / 8; i++)
+    UnalignedMemoryAccess(thr, pc, (uptr)addr + (i * 8), 8, kAccessWrite);
+}
+
+void __tsan_unaligned_read16(const void *addr) {
+  __tsan_unaligned_readx<16>(addr);
+}
+
+void __tsan_unaligned_write16(void *addr) { __tsan_unaligned_writex<16>(addr); }
+
+extern "C" void __tsan_unaligned_read32(const void *addr) {
+  __tsan_unaligned_readx<32>(addr);
+}
+
+extern "C" void __tsan_unaligned_write32(void *addr) {
+  __tsan_unaligned_writex<32>(addr);
+}
+
+extern "C" void __tsan_unaligned_read64(const void *addr) {
+  __tsan_unaligned_readx<64>(addr);
+}
+
+extern "C" void __tsan_unaligned_write64(void *addr) {
+  __tsan_unaligned_writex<64>(addr);
 }
 
 extern "C" {
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.h b/compiler-rt/lib/tsan/rtl/tsan_interface.h
index 3731c90d45915..ec24aaa9578d7 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.h
@@ -53,11 +53,15 @@ SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read2(const void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read4(const void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read8(const void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read16(const void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read32(const void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_read64(const void *addr);
 
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write2(void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write4(void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write8(void *addr);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write16(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write32(void *addr);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_unaligned_write64(void *addr);
 
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read1_pc(void *addr, void *pc);
 SANITIZER_INTERFACE_ATTRIBUTE void __tsan_read2_pc(void *addr, void *pc);
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.inc b/compiler-rt/lib/tsan/rtl/tsan_interface.inc
index b0a424ff9c255..b7894e167db9e 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.inc
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.inc
@@ -38,6 +38,18 @@ void __tsan_read16(void *addr) {
   MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
 }
 
+extern "C" void __tsan_read32(void *addr) {
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessRead);
+}
+
+extern "C" void __tsan_read64(void *addr) {
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessRead);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessRead);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 32, kAccessRead);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 48, kAccessRead);
+}
+
 void __tsan_write1(void *addr) {
   MemoryAccess(cur_thread(), CALLERPC, (uptr)addr, 1, kAccessWrite);
 }
@@ -58,6 +70,21 @@ void __tsan_write16(void *addr) {
   MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
 }
 
+extern "C" void __tsan_write32(void *addr) {
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessWrite);
+}
+
+extern "C" void __tsan_write64(void *addr) {
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr, kAccessWrite);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 16, kAccessWrite);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 32, kAccessWrite);
+  MemoryAccess16(cur_thread(), CALLERPC, (uptr)addr + 48, kAccessWrite);
+}
+
+// Our vector instructions
+// TODO
+
 void __tsan_read1_pc(void *addr, void *pc) {
   MemoryAccess(cur_thread(), STRIP_PAC_PC(pc), (uptr)addr, 1, kAccessRead | kAccessExternalPC);
 }
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp
new file mode 100644
index 0000000000000..cc50afd383d5b
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.cpp
@@ -0,0 +1,37 @@
+#include "tsan_interface_avx2.h"
+
+#include <immintrin.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_ptrauth.h"
+#include "tsan_interface_ann.h"
+#include "tsan_rtl.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;
+
+#ifdef __AVX__
+extern "C" void __tsan_scatter_vector4(__m256i vaddr, int size, uint8_t mask) {
+  void *addr[4] = {};
+  _mm256_store_si256((__m256i *)addr, vaddr);
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  for (int i = 0; i < 4; i++)
+    if ((mask >> i) & 1)
+      UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessWrite);
+}
+
+extern "C" void __tsan_gather_vector4(__m256i vaddr, int size, uint8_t mask) {
+  void *addr[4] = {};
+  _mm256_store_si256((__m256i *)addr, vaddr);
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  for (int i = 0; i < 4; i++)
+    if ((mask >> i) & 1)
+      UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessRead);
+}
+#endif /*__AVX__*/
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
new file mode 100644
index 0000000000000..84c001be8855b
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
@@ -0,0 +1,46 @@
+//===-- tsan_interface_avx2.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// The functions declared in this header will be inserted by the instrumentation
+// module.
+// This header can be included by the instrumented program or by TSan tests.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_AVX2_H
+#define TSAN_INTERFACE_AVX2_H
+
+#include <immintrin.h>
+#include <sanitizer_common/sanitizer_internal_defs.h>
+#include <stdint.h>
+using __sanitizer::tid_t;
+using __sanitizer::uptr;
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __tsan_.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !SANITIZER_GO
+#  ifdef __AVX__
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_scatter_vector4(__m256i vaddr,
+                                                          int width,
+                                                          uint8_t mask);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_gather_vector4(__m256i vaddr,
+                                                         int width,
+                                                         uint8_t mask);
+#  endif /*__AVX__*/
+#endif   // SANITIZER_GO
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif /*TSAN_INTERFACE_AVX2_H*/
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
new file mode 100644
index 0000000000000..ab8fbf2af3a76
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
@@ -0,0 +1,43 @@
+#include "tsan_interface_avx512.h"
+
+#include <immintrin.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <unistd.h>
+
+#include "sanitizer_common/sanitizer_internal_defs.h"
+#include "sanitizer_common/sanitizer_ptrauth.h"
+#include "tsan_interface_ann.h"
+#include "tsan_rtl.h"
+
+#define CALLERPC ((uptr)__builtin_return_address(0))
+
+using namespace __tsan;
+
+#ifdef __AVX512F__
+extern "C" void __tsan_scatter_vector8(__m512i vaddr, int size, uint8_t mask) {
+  void *addr[8] = {};
+  __m256i v256_1 = _mm512_extracti64x4_epi64(vaddr, 0);
+  __m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 4);
+  _mm256_store_si256((__m256i *)addr, v256_1);
+  _mm256_store_si256((__m256i *)&(addr[4]), v256_2);
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  for (int i = 0; i < 8; i++)
+    if ((mask >> i) & 1)
+      UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessWrite);
+}
+
+extern "C" void __tsan_gather_vector8(__m512i vaddr, int size, uint8_t mask) {
+  void *addr[8] = {};
+  __m256i v256_1 = _mm512_extracti64x4_epi64(vaddr, 0);
+  __m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 4);
+  _mm256_store_si256((__m256i *)addr, v256_1);
+  _mm256_store_si256((__m256i *)(&addr[4]), v256_2);
+  uptr pc = CALLERPC;
+  ThreadState *thr = cur_thread();
+  for (int i = 0; i < 8; i++)
+    if ((mask >> i) & 1)
+      UnalignedMemoryAccess(thr, pc, (uptr)addr[i], size, kAccessRead);
+}
+#endif /*__AVX512F__*/
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
new file mode 100644
index 0000000000000..179f64a89a9f1
--- /dev/null
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
@@ -0,0 +1,46 @@
+//===-- tsan_interface_avx512.h ----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is a part of ThreadSanitizer (TSan), a race detector.
+//
+// The functions declared in this header will be inserted by the instrumentation
+// module.
+// This header can be included by the instrumented program or by TSan tests.
+//===----------------------------------------------------------------------===//
+#ifndef TSAN_INTERFACE_AVX512_H
+#define TSAN_INTERFACE_AVX512_H
+
+#include <immintrin.h>
+#include <sanitizer_common/sanitizer_internal_defs.h>
+#include <stdint.h>
+using __sanitizer::tid_t;
+using __sanitizer::uptr;
+
+// This header should NOT include any other headers.
+// All functions in this header are extern "C" and start with __tsan_.
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if !SANITIZER_GO
+#  ifdef __AVX512F__
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_scatter_vector8(__m512i vaddr,
+                                                          int width,
+                                                          uint8_t mask);
+SANITIZER_INTERFACE_ATTRIBUTE void __tsan_gather_vector8(__m512i vaddr,
+                                                         int width,
+                                                         uint8_t mask);
+#  endif /*__AVX512F__*/
+#endif   // SANITIZER_GO
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif /*TSAN_INTERFACE_AVX512_H*/
diff --git a/compiler-rt/test/tsan/simd_broadcast_norace.c b/compiler-rt/test/tsan/simd_broadcast_norace.c
new file mode 100644
index 0000000000000..3a7c2cfe279dc
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_broadcast_norace.c
@@ -0,0 +1,45 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE c;
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j] += c;
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_broadcast_race.c b/compiler-rt/test/tsan/simd_broadcast_race.c
new file mode 100644
index 0000000000000..08d6207ede722
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_broadcast_race.c
@@ -0,0 +1,43 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j] += A[64];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_gather_race.c b/compiler-rt/test/tsan/simd_gather_race.c
new file mode 100644
index 0000000000000..1d7c68a0bc93e
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_gather_race.c
@@ -0,0 +1,44 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j + CHUNK_SIZE] = A[j * 2] + B[j];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_gatherscatter_norace.c b/compiler-rt/test/tsan/simd_gatherscatter_norace.c
new file mode 100644
index 0000000000000..3f5994119223c
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_gatherscatter_norace.c
@@ -0,0 +1,45 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j * 2] += B[j];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_loadstore_norace.c b/compiler-rt/test/tsan/simd_loadstore_norace.c
new file mode 100644
index 0000000000000..ed3421d026a8e
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_loadstore_norace.c
@@ -0,0 +1,45 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j] += B[j];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_loadstore_race.c b/compiler-rt/test/tsan/simd_loadstore_race.c
new file mode 100644
index 0000000000000..4525404d1b8e5
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_loadstore_race.c
@@ -0,0 +1,44 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 256
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j + 64] = A[j] + B[j];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_scatter_mask_norace.c b/compiler-rt/test/tsan/simd_scatter_mask_norace.c
new file mode 100644
index 0000000000000..1526e3c9e05e5
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_scatter_mask_norace.c
@@ -0,0 +1,56 @@
+// RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=float %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=double %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=8 -DTYPE=float %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=8 -DTYPE=double %s -o %t && %run %t 2>&1 | FileCheck %s
+#include "test.h"
+#include <immintrin.h>
+#include <stdint.h>
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+
+#if SIMDLEN == 4
+#  define tsan_scatter_func __tsan_scatter_vector4
+#  define intri_type __m256i
+#elif SIMDLEN == 8
+#  define tsan_scatter_func __tsan_scatter_vector8
+#  define intri_type __m512i
+#endif
+
+extern void tsan_scatter_func(intri_type, int, uint8_t);
+TYPE A[8];
+
+__attribute__((disable_sanitizer_instrumentation)) void *Thread(uint8_t mask) {
+#if SIMDLEN == 4
+  __m256i vaddr = _mm256_set_epi64x(
+#elif SIMDLEN == 8
+  __m512i vaddr = _mm512_set_epi64(
+      (int64_t)(A + 7), (int64_t)(A + 6), (int64_t)(A + 5), (int64_t)(A + 4),
+#endif
+      (int64_t)(A + 3), (int64_t)(A + 2), (int64_t)(A + 1), (int64_t)(A + 0));
+  tsan_scatter_func(vaddr, sizeof(TYPE), mask);
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0b01010101); }
+
+void *Thread2(void *x) { return Thread(0b10101010); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  fprintf(stderr, "DONE.\n");
+  return 0;
+}
+
+// CHECK-NOT: WARNING: ThreadSanitizer: data race
+// CHECK-NOT: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_scatter_mask_race.c b/compiler-rt/test/tsan/simd_scatter_mask_race.c
new file mode 100644
index 0000000000000..8bf634494028a
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_scatter_mask_race.c
@@ -0,0 +1,55 @@
+// RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=float %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=double %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=8 -DTYPE=float %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -march=native -DSIMDLEN=8 -DTYPE=double %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+#include <immintrin.h>
+#include <stdint.h>
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+
+#if SIMDLEN == 4
+#  define tsan_scatter_func __tsan_scatter_vector4
+#  define intri_type __m256i
+#elif SIMDLEN == 8
+#  define tsan_scatter_func __tsan_scatter_vector8
+#  define intri_type __m512i
+#endif
+
+extern void tsan_scatter_func(intri_type, int, uint8_t);
+TYPE A[8];
+
+__attribute__((disable_sanitizer_instrumentation)) void *Thread(uint8_t mask) {
+#if SIMDLEN == 4
+  __m256i vaddr = _mm256_set_epi64x(
+#elif SIMDLEN == 8
+  __m512i vaddr = _mm512_set_epi64(
+      (int64_t)(A + 7), (int64_t)(A + 6), (int64_t)(A + 5), (int64_t)(A + 4),
+#endif
+      (int64_t)(A + 3), (int64_t)(A + 2), (int64_t)(A + 1), (int64_t)(A + 0));
+  tsan_scatter_func(vaddr, sizeof(TYPE), mask);
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0b01010101); }
+
+void *Thread2(void *x) { return Thread(0b10101011); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/compiler-rt/test/tsan/simd_scatter_race.c b/compiler-rt/test/tsan/simd_scatter_race.c
new file mode 100644
index 0000000000000..84f9f4d51cab9
--- /dev/null
+++ b/compiler-rt/test/tsan/simd_scatter_race.c
@@ -0,0 +1,44 @@
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+#include "test.h"
+
+#ifndef SIMDLEN
+#  define SIMDLEN 8
+#endif /*SIMDLEN*/
+#ifndef TYPE
+#  define TYPE double
+#endif /*TYPE*/
+#define LEN 2000
+#define CHUNK_SIZE 64
+
+TYPE A[2 * LEN];
+TYPE B[LEN];
+
+void *Thread(intptr_t offset) {
+  for (intptr_t i = offset; i < LEN; i += (2 * CHUNK_SIZE)) {
+#pragma omp simd simdlen(SIMDLEN)
+    for (intptr_t j = i; j < i + CHUNK_SIZE; j++)
+      A[j * 2] = A[j + CHUNK_SIZE] + B[j];
+  }
+  barrier_wait(&barrier);
+  return NULL;
+}
+
+void *Thread1(void *x) { return Thread(0); }
+
+void *Thread2(void *x) { return Thread(CHUNK_SIZE); }
+
+int main() {
+  barrier_init(&barrier, 2);
+  pthread_t t[2];
+  pthread_create(&t[0], NULL, Thread1, NULL);
+  pthread_create(&t[1], NULL, Thread2, NULL);
+  pthread_join(t[0], NULL);
+  pthread_join(t[1], NULL);
+  return 0;
+}
+
+// CHECK: WARNING: ThreadSanitizer: data race
+// CHECK: SUMMARY: ThreadSanitizer: data race{{.*}}Thread
diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 8ee0bca7e354f..78b4f8edd3468 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -67,6 +67,9 @@ static cl::opt<bool> ClInstrumentAtomics("tsan-instrument-atomics",
                                          cl::init(true),
                                          cl::desc("Instrument atomics"),
                                          cl::Hidden);
+static cl::opt<bool> ClInstrumentSimd("tsan-instrument-simd", cl::init(true),
+                                      cl::desc("Instrument simd instructions"),
+                                      cl::Hidden);
 static cl::opt<bool> ClInstrumentMemIntrinsics(
     "tsan-instrument-memintrinsics", cl::init(true),
     cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
@@ -142,6 +145,7 @@ struct ThreadSanitizer {
   bool addrPointsToConstantData(Value *Addr);
   int getMemoryAccessFuncIndex(Type *OrigTy, Value *Addr, const DataLayout &DL);
   void InsertRuntimeIgnores(Function &F);
+  bool instrumentGatherOrScatter(Instruction *I, const DataLayout &DL);
 
   Type *IntptrTy;
   FunctionCallee TsanFuncEntry;
@@ -149,7 +153,7 @@ struct ThreadSanitizer {
   FunctionCallee TsanIgnoreBegin;
   FunctionCallee TsanIgnoreEnd;
   // Accesses sizes are powers of two: 1, 2, 4, 8, 16.
-  static const size_t kNumberOfAccessSizes = 5;
+  static const size_t kNumberOfAccessSizes = 9;
   FunctionCallee TsanRead[kNumberOfAccessSizes];
   FunctionCallee TsanWrite[kNumberOfAccessSizes];
   FunctionCallee TsanUnalignedRead[kNumberOfAccessSizes];
@@ -170,6 +174,8 @@ struct ThreadSanitizer {
   FunctionCallee TsanVptrUpdate;
   FunctionCallee TsanVptrLoad;
   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
+  FunctionCallee TsanVectorScatter[2];
+  FunctionCallee TsanVectorGather[2];
 };
 
 void insertModuleCtor(Module &M) {
@@ -213,6 +219,26 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) {
   TsanIgnoreEnd =
       M.getOrInsertFunction("__tsan_ignore_thread_end", Attr, IRB.getVoidTy());
   IntegerType *OrdTy = IRB.getInt32Ty();
+
+  TsanVectorScatter[0] = M.getOrInsertFunction(
+      SmallString<32>("__tsan_scatter_vector4"), Attr, IRB.getVoidTy(),
+      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(4)),
+      IRB.getInt32Ty(), IRB.getInt8Ty());
+  TsanVectorScatter[1] = M.getOrInsertFunction(
+      SmallString<32>("__tsan_scatter_vector8"), Attr, IRB.getVoidTy(),
+      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(8)),
+      IRB.getInt32Ty(),
+      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(4)),
+      IRB.getInt32Ty(), IRB.getInt8Ty());
+  TsanVectorGather[0] = M.getOrInsertFunction(
+      SmallString<32>("__tsan_gather_vector4"), Attr, IRB.getVoidTy(),
+      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(4)),
+      IRB.getInt32Ty(), IRB.getInt8Ty());
+  TsanVectorGather[1] = M.getOrInsertFunction(
+      SmallString<32>("__tsan_gather_vector8"), Attr, IRB.getVoidTy(),
+      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(8)),
+      IRB.getInt32Ty(), IRB.getInt8Ty());
+
   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {
     const unsigned ByteSize = 1U << i;
     const unsigned BitSize = ByteSize * 8;
@@ -506,30 +532,40 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
 
   initialize(*F.getParent(), TLI);
   SmallVector<InstructionInfo, 8> AllLoadsAndStores;
-  SmallVector<Instruction*, 8> LocalLoadsAndStores;
-  SmallVector<Instruction*, 8> AtomicAccesses;
-  SmallVector<Instruction*, 8> MemIntrinCalls;
+  SmallVector<Instruction *, 8> LocalLoadsAndStores;
+  SmallVector<Instruction *, 8> AtomicAccesses;
+  SmallVector<Instruction *, 8> MemIntrinCalls;
+  SmallVector<Instruction *, 8> AllGathersAndScatters;
+
   bool Res = false;
   bool HasCalls = false;
   bool SanitizeFunction = F.hasFnAttribute(Attribute::SanitizeThread);
   const DataLayout &DL = F.getParent()->getDataLayout();
 
-  // Traverse all instructions, collect loads/stores/returns, check for calls.
+  // Traverse all instructions, collect loads/stores/returns/gathers/scatters,
+  // check for calls.
   for (auto &BB : F) {
     for (auto &Inst : BB) {
       // Skip instructions inserted by another instrumentation.
       if (Inst.hasMetadata(LLVMContext::MD_nosanitize))
         continue;
-      if (isTsanAtomic(&Inst))
+      if (isTsanAtomic(&Inst)) {
         AtomicAccesses.push_back(&Inst);
-      else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst))
+      } else if (isa<LoadInst>(Inst) || isa<StoreInst>(Inst)) {
         LocalLoadsAndStores.push_back(&Inst);
-      else if ((isa<CallInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) ||
-               isa<InvokeInst>(Inst)) {
-        if (CallInst *CI = dyn_cast<CallInst>(&Inst))
+      } else if ((isa<CallInst>(Inst) && !isa<DbgInfoIntrinsic>(Inst)) ||
+                 isa<InvokeInst>(Inst)) {
+        if (CallInst *CI = dyn_cast<CallInst>(&Inst)) {
+          auto CFunc = CI->getCalledFunction();
+          if (CFunc && (CFunc->getName().contains("llvm.masked.scatter") ||
+                        CFunc->getName().contains("llvm.masked.gather"))) {
+            AllGathersAndScatters.push_back(&Inst);
+          }
           maybeMarkSanitizerLibraryCallNoBuiltin(CI, &TLI);
-        if (isa<MemIntrinsic>(Inst))
+        }
+        if (isa<MemIntrinsic>(Inst)) {
           MemIntrinCalls.push_back(&Inst);
+        }
         HasCalls = true;
         chooseInstructionsToInstrument(LocalLoadsAndStores, AllLoadsAndStores,
                                        DL);
@@ -548,6 +584,12 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
       Res |= instrumentLoadOrStore(II, DL);
     }
 
+  // Instrument gather and scatter memory accesses
+  if (ClInstrumentSimd && ClInstrumentMemoryAccesses && SanitizeFunction)
+    for (const auto &II : AllGathersAndScatters) {
+      Res |= instrumentGatherOrScatter(II, DL);
+    }
+
   // Instrument atomic memory accesses in any case (they can be used to
   // implement synchronization).
   if (ClInstrumentAtomics)
@@ -658,6 +700,29 @@ bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
   return true;
 }
 
+bool ThreadSanitizer::instrumentGatherOrScatter(Instruction *I,
+                                                const DataLayout &DL) {
+  InstrumentationIRBuilder IRB(I);
+  StringRef FunctionNameRef =
+      dyn_cast<CallInst>(I)->getCalledFunction()->getName();
+  bool IsScatter = FunctionNameRef.contains("scatter");
+  unsigned OperandIdx = IsScatter ? 0 : 3;
+  unsigned NumElements =
+      cast<FixedVectorType>(I->getOperand(OperandIdx)->getType())
+          ->getNumElements();
+  unsigned BytesPerElement =
+      DL.getTypeSizeInBits(I->getOperand(OperandIdx)->getType()) / NumElements /
+      8;
+  FunctionCallee *TsanVector = IsScatter ? TsanVectorScatter : TsanVectorGather;
+  std::vector<Value *> Args{
+      I->getOperand(IsScatter ? 1 : 0),
+      llvm::ConstantInt::get(I->getContext(), llvm::APInt(32, BytesPerElement)),
+      IRB.CreateBitCast(I->getOperand(IsScatter ? 3 : 2), IRB.getInt8Ty())};
+  IRB.CreateCall(TsanVector[NumElements == 4 ? 0 : 1], Args);
+
+  return true;
+}
+
 static ConstantInt *createOrdering(IRBuilder<> *IRB, AtomicOrdering ord) {
   uint32_t v = 0;
   switch (ord) {
@@ -804,8 +869,8 @@ int ThreadSanitizer::getMemoryAccessFuncIndex(Type *OrigTy, Value *Addr,
                                               const DataLayout &DL) {
   assert(OrigTy->isSized());
   uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
-  if (TypeSize != 8  && TypeSize != 16 &&
-      TypeSize != 32 && TypeSize != 64 && TypeSize != 128) {
+  if (TypeSize != 8 && TypeSize != 16 && TypeSize != 32 && TypeSize != 64 &&
+      TypeSize != 128 && TypeSize != 256 && TypeSize != 512) {
     NumAccessesWithBadSize++;
     // Ignore all unusual sizes.
     return -1;
diff --git a/openmp/tools/archer/tests/lit.cfg b/openmp/tools/archer/tests/lit.cfg
index 692cbfe97cf1e..cd19f9e95f700 100644
--- a/openmp/tools/archer/tests/lit.cfg
+++ b/openmp/tools/archer/tests/lit.cfg
@@ -115,7 +115,7 @@ config.substitutions.append(("%libarcher-cxx-compile-and-run", \
 config.substitutions.append(("%libarcher-cxx-compile", \
     "%clang-archerXX %openmp_flags %archer_flags %flags -std=c++17 %s -o %t" + libs))
 config.substitutions.append(("%libarcher-compile", \
-                             "%clang-archer %openmp_flags %archer_flags %flags %s -o %t" + libs))
+                             "%clang-archer %openmp_flags %archer_flags %flags -march=native %s -o %t" + libs))
 config.substitutions.append(("%libarcher-run-race", "%suppression %deflake %t 2>&1 | tee %t.log"))
 config.substitutions.append(("%libarcher-run-nosuppression", "%nosuppression %t 2>&1 | tee %t.log"))
 config.substitutions.append(("%libarcher-run", "%suppression %t 2>&1 | tee %t.log"))
diff --git a/openmp/tools/archer/tests/simd/simd-broadcast-no.c b/openmp/tools/archer/tests/simd/simd-broadcast-no.c
new file mode 100644
index 0000000000000..41bf837e8e478
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-broadcast-no.c
@@ -0,0 +1,44 @@
+/*
+ * simd-broadcast-no.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run | FileCheck %s
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run | FileCheck %s
+// REQUIRES: tsan
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+  if (argc > 1)
+    len = atoi(argv[1]);
+  double a[len];
+  for (int i = 0; i < len; i++)
+    a[i] = i;
+  TYPE c = M_PI;
+
+#pragma omp parallel for simd num_threads(2) schedule(dynamic, 64)
+  for (int i = 0; i < len; i++)
+    a[i] = a[i] + c;
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: ThreadSanitizer: data race
+// CHECK-NOT: ThreadSanitizer: reported
+// CHECK: DONE
diff --git a/openmp/tools/archer/tests/simd/simd-broadcast-yes.c b/openmp/tools/archer/tests/simd/simd-broadcast-yes.c
new file mode 100644
index 0000000000000..23a0f545092ab
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-broadcast-yes.c
@@ -0,0 +1,55 @@
+/*
+ * simd-broadcast-yes.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s 
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s 
+// REQUIRES: tsan
+
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+  if (argc > 1)
+    len = atoi(argv[1]);
+  TYPE a[len];
+  for (int i = 0; i < len; i++)
+    a[i] = i;
+  double c = M_PI;
+
+#pragma omp parallel for simd num_threads(2) schedule(dynamic, 64)
+  for (int i = 0; i < len; i++)
+    a[i] = a[i] + a[64];
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// FLOAT: WARNING: ThreadSanitizer: data race
+// FLOAT-NEXT:   {{(Write|Read)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-broadcast-yes.c
+// FLOAT:   Previous {{(read|write)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-broadcast-yes.c
+
+// DOUBLE: WARNING: ThreadSanitizer: data race
+// DOUBLE-NEXT:   {{(Write|Read)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-broadcast-yes.c
+// DOUBLE:   Previous {{(read|write)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-broadcast-yes.c
+
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported {{[0-9]+}} warnings
diff --git a/openmp/tools/archer/tests/simd/simd-gather-yes.c b/openmp/tools/archer/tests/simd/simd-gather-yes.c
new file mode 100644
index 0000000000000..881ab6f17dabc
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-gather-yes.c
@@ -0,0 +1,63 @@
+/*
+ * simd-gather-yes.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// REQUIRES: tsan
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+#ifndef SIMDLEN
+#define SIMDLEN 8
+#endif /*SIMDLEN*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+
+  if (argc > 1)
+    len = atoi(argv[1]);
+  TYPE a[2 * len], b[len];
+
+  for (int i = 0; i < 2 * len; i++)
+    a[i] = i;
+  for (int i = 0; i < len; i++)
+    b[i] = i + 1;
+
+#pragma omp parallel for simd schedule(dynamic, 64) simdlen(SIMDLEN)
+  for (int i = 0; i < len; i++)
+    a[i + 64] = a[i * 2] + b[i];
+
+  printf("DONE\n");
+  return 0;
+}
+
+// FLOAT: WARNING: ThreadSanitizer: data race
+// FLOAT-NEXT:   {{(Write|Read)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-gather-yes.c
+// FLOAT:   Previous {{(read|write)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-gather-yes.c
+
+// DOUBLE: WARNING: ThreadSanitizer: data race
+// DOUBLE-NEXT:   {{(Write|Read)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-gather-yes.c
+// DOUBLE:   Previous {{(read|write)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-gather-yes.c
+
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported {{[0-9]+}} warnings
diff --git a/openmp/tools/archer/tests/simd/simd-gatherscatter-no.c b/openmp/tools/archer/tests/simd/simd-gatherscatter-no.c
new file mode 100644
index 0000000000000..9c4d659ea2617
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-gatherscatter-no.c
@@ -0,0 +1,46 @@
+/*
+ * simd-gatherscatter-no.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run | FileCheck %s
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run | FileCheck %s
+// REQUIRES: tsan
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+
+  if (argc > 1)
+    len = atoi(argv[1]);
+  TYPE a[2 * len], b[len];
+
+  for (int i = 0; i < 2 * len; i++)
+    a[i] = i;
+  for (int i = 0; i < len; i++)
+    b[i] = i + 1;
+
+#pragma omp parallel for simd schedule(dynamic, 64)
+  for (int i = 0; i < len; i++)
+    a[i * 2] = a[i * 2] + b[i];
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: ThreadSanitizer: data race
+// CHECK-NOT: ThreadSanitizer: reported
+// CHECK: DONE
diff --git a/openmp/tools/archer/tests/simd/simd-loadstore-no.c b/openmp/tools/archer/tests/simd/simd-loadstore-no.c
new file mode 100644
index 0000000000000..1471f65639d77
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-loadstore-no.c
@@ -0,0 +1,46 @@
+/*
+ * simd-loadstore-no.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run | FileCheck %s
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run | FileCheck %s
+// REQUIRES: tsan
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+
+  if (argc > 1)
+    len = atoi(argv[1]);
+  TYPE a[len], b[len];
+
+  for (int i = 0; i < len; i++) {
+    a[i] = i;
+    b[i] = i + 1;
+  }
+
+#pragma omp parallel for simd schedule(dynamic, 64)
+  for (int i = 0; i < len; i++)
+    a[i] = a[i] + b[i];
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// CHECK-NOT: ThreadSanitizer: data race
+// CHECK-NOT: ThreadSanitizer: reported
+// CHECK: DONE
diff --git a/openmp/tools/archer/tests/simd/simd-loadstore-yes.c b/openmp/tools/archer/tests/simd/simd-loadstore-yes.c
new file mode 100644
index 0000000000000..d122a0bda2afa
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-loadstore-yes.c
@@ -0,0 +1,57 @@
+/*
+ * simd-loadstore-yes.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// REQUIRES: tsan
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+
+  if (argc > 1)
+    len = atoi(argv[1]);
+  double a[len], b[len];
+
+  for (int i = 0; i < len; i++) {
+    a[i] = i;
+    b[i] = i + 1;
+  }
+
+#pragma omp parallel for simd schedule(dynamic, 64)
+  for (int i = 0; i < len - 64; i++)
+    a[i + 64] = a[i] + b[i];
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// FLOAT: WARNING: ThreadSanitizer: data race
+// FLOAT-NEXT:   {{(Write|Read)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-loadstore-yes.c
+// FLOAT:   Previous {{(read|write)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-loadstore-yes.c
+
+// DOUBLE: WARNING: ThreadSanitizer: data race
+// DOUBLE-NEXT:   {{(Write|Read)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-loadstore-yes.c
+// DOUBLE:   Previous {{(read|write)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-loadstore-yes.c
+
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported {{[0-9]+}} warnings
diff --git a/openmp/tools/archer/tests/simd/simd-scatter-yes.c b/openmp/tools/archer/tests/simd/simd-scatter-yes.c
new file mode 100644
index 0000000000000..fcf3178381572
--- /dev/null
+++ b/openmp/tools/archer/tests/simd/simd-scatter-yes.c
@@ -0,0 +1,63 @@
+/*
+ * simd-scatter-yes.c -- Archer testcase
+ */
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+//
+// See tools/LICENSE.txt for details.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// REQUIRES: tsan
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#ifndef TYPE
+#define TYPE double
+#endif /*TYPE*/
+
+#ifndef SIMDLEN
+#define SIMDLEN 8
+#endif /*SIMDLEN*/
+
+int main(int argc, char *argv[]) {
+  int len = 20000;
+
+  if (argc > 1)
+    len = atoi(argv[1]);
+  TYPE a[2 * len], b[len];
+
+  for (int i = 0; i < 2 * len; i++)
+    a[i] = i;
+  for (int i = 0; i < len; i++)
+    b[i] = i + 1;
+
+#pragma omp parallel for simd schedule(dynamic, 64) simdlen(SIMDLEN)
+  for (int i = 0; i < len; i++)
+    a[i * 2] = a[i + 64] + b[i];
+
+  fprintf(stderr, "DONE\n");
+  return 0;
+}
+
+// FLOAT: WARNING: ThreadSanitizer: data race
+// FLOAT-NEXT:   {{(Write|Read)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-scatter-yes.c
+// FLOAT:   Previous {{(read|write)}} of size {{(4|8)}}
+// FLOAT-NEXT: #0 {{.*}}simd-scatter-yes.c
+
+// DOUBLE: WARNING: ThreadSanitizer: data race
+// DOUBLE-NEXT:   {{(Write|Read)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-scatter-yes.c
+// DOUBLE:   Previous {{(read|write)}} of size 8
+// DOUBLE-NEXT: #0 {{.*}}simd-scatter-yes.c
+
+// CHECK: DONE
+// CHECK: ThreadSanitizer: reported {{[0-9]+}} warnings

>From e54345701a10dc8b161eb9353e2fd7dc86bbc44f Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Wed, 6 Dec 2023 19:01:57 +0100
Subject: [PATCH 02/14] Revert unwanted changes

---
 compiler-rt/cmake/Modules/AddCompilerRT.cmake | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/cmake/Modules/AddCompilerRT.cmake b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
index d85d649619032..4d9b68a3cc25b 100644
--- a/compiler-rt/cmake/Modules/AddCompilerRT.cmake
+++ b/compiler-rt/cmake/Modules/AddCompilerRT.cmake
@@ -385,8 +385,8 @@ function(add_compiler_rt_runtime name type)
       target_link_libraries(${libname} PRIVATE ${builtins_${libname}})
     endif()
     if(${type} STREQUAL "SHARED")
-      if(COMMAND llvm_setup_rpath)
-        llvm_setup_rpath(${libname})
+      if(APPLE OR WIN32)
+        set_property(TARGET ${libname} PROPERTY BUILD_WITH_INSTALL_RPATH ON)
       endif()
       if(WIN32 AND NOT CYGWIN AND NOT MINGW)
         set_target_properties(${libname} PROPERTIES IMPORT_PREFIX "")

>From 0415d2c6af0df940fc671f4351a6b5328029cd1f Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Wed, 6 Dec 2023 20:14:13 +0100
Subject: [PATCH 03/14] Fix format

---
 compiler-rt/lib/tsan/rtl/tsan_interface.cpp         |  4 ++--
 compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h      |  2 +-
 compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h    |  2 +-
 openmp/tools/archer/tests/simd/simd-broadcast-yes.c |  6 ++++--
 openmp/tools/archer/tests/simd/simd-gather-yes.c    | 12 ++++++++----
 openmp/tools/archer/tests/simd/simd-loadstore-yes.c |  6 ++++--
 openmp/tools/archer/tests/simd/simd-scatter-yes.c   | 12 ++++++++----
 7 files changed, 28 insertions(+), 16 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
index c97cf62e2e9bd..7dff3ecc41ff5 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface.cpp
@@ -11,11 +11,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "tsan_interface.h"
-#include "tsan_interface_ann.h"
-#include "tsan_rtl.h"
 
 #include "sanitizer_common/sanitizer_internal_defs.h"
 #include "sanitizer_common/sanitizer_ptrauth.h"
+#include "tsan_interface_ann.h"
+#include "tsan_rtl.h"
 
 #define CALLERPC ((uptr)__builtin_return_address(0))
 
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
index 84c001be8855b..f566e12aae755 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx2.h
@@ -1,4 +1,4 @@
-//===-- tsan_interface_avx2.h ----------------------------------------*- C++ -*-===//
+//===-- tsan_interface_avx2.h ------------------------------------- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
index 179f64a89a9f1..10cdab2076630 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.h
@@ -1,4 +1,4 @@
-//===-- tsan_interface_avx512.h ----------------------------------------*- C++ -*-===//
+//===-- tsan_interface_avx512.h ----------------------------------- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
diff --git a/openmp/tools/archer/tests/simd/simd-broadcast-yes.c b/openmp/tools/archer/tests/simd/simd-broadcast-yes.c
index 23a0f545092ab..f8e945b9d52b5 100644
--- a/openmp/tools/archer/tests/simd/simd-broadcast-yes.c
+++ b/openmp/tools/archer/tests/simd/simd-broadcast-yes.c
@@ -10,8 +10,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %libarcher-compile -DTYPE=float && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s 
-// RUN: %libarcher-compile -DTYPE=double && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s 
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=DOUBLE %s
 // REQUIRES: tsan
 
 #include <math.h>
diff --git a/openmp/tools/archer/tests/simd/simd-gather-yes.c b/openmp/tools/archer/tests/simd/simd-gather-yes.c
index 881ab6f17dabc..59770ada9102d 100644
--- a/openmp/tools/archer/tests/simd/simd-gather-yes.c
+++ b/openmp/tools/archer/tests/simd/simd-gather-yes.c
@@ -10,10 +10,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
-// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
-// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
-// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=4 && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=8 && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=4 && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=DOUBLE %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=8 && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=DOUBLE %s
 // REQUIRES: tsan
 
 #include <stdio.h>
diff --git a/openmp/tools/archer/tests/simd/simd-loadstore-yes.c b/openmp/tools/archer/tests/simd/simd-loadstore-yes.c
index d122a0bda2afa..bc8f863c52397 100644
--- a/openmp/tools/archer/tests/simd/simd-loadstore-yes.c
+++ b/openmp/tools/archer/tests/simd/simd-loadstore-yes.c
@@ -10,8 +10,10 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %libarcher-compile -DTYPE=float && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
-// RUN: %libarcher-compile -DTYPE=double && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// RUN: %libarcher-compile -DTYPE=float && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=double && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=DOUBLE %s
 // REQUIRES: tsan
 
 #include <stdio.h>
diff --git a/openmp/tools/archer/tests/simd/simd-scatter-yes.c b/openmp/tools/archer/tests/simd/simd-scatter-yes.c
index fcf3178381572..ac9238ff11f91 100644
--- a/openmp/tools/archer/tests/simd/simd-scatter-yes.c
+++ b/openmp/tools/archer/tests/simd/simd-scatter-yes.c
@@ -10,10 +10,14 @@
 //
 //===----------------------------------------------------------------------===//
 
-// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
-// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=FLOAT %s
-// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=4 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
-// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=8 && %libarcher-run-race | FileCheck --check-prefix=DOUBLE %s
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=4 && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=float -DSIMDLEN=8 && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=FLOAT %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=4 && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=DOUBLE %s
+// RUN: %libarcher-compile -DTYPE=double -DSIMDLEN=8 && %libarcher-run-race \
+// RUN: | FileCheck --check-prefix=DOUBLE %s
 // REQUIRES: tsan
 
 #include <stdio.h>

>From 893b94f8c1ccb2bba6a2d6a2e483bd5521923894 Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Thu, 7 Dec 2023 14:27:10 +0100
Subject: [PATCH 04/14] Cleanup

---
 compiler-rt/lib/tsan/rtl/CMakeLists.txt | 1 -
 1 file changed, 1 deletion(-)

diff --git a/compiler-rt/lib/tsan/rtl/CMakeLists.txt b/compiler-rt/lib/tsan/rtl/CMakeLists.txt
index 4df1a6c8fca89..93fa0523ae4c3 100644
--- a/compiler-rt/lib/tsan/rtl/CMakeLists.txt
+++ b/compiler-rt/lib/tsan/rtl/CMakeLists.txt
@@ -245,7 +245,6 @@ else()
         ARCHS ${arch}
         SOURCES tsan_interface_avx2.cpp
         ADDITIONAL_HEADERS tsan_interface_avx2.h 
-        #CFLAGS ${TSAN_RTL_CFLAGS} $<IF:"$COMPILER_RT_HAS_MAVX2_FLAG","-mavx2","">)
         CFLAGS ${TSAN_RTL_CFLAGS} $<IF:$<BOOL:${COMPILER_RT_HAS_MAVX2_FLAG}>,-mavx2,"">)
     add_compiler_rt_object_libraries(RTTSanAVX512
         ARCHS ${arch}

>From 1f9ad09958d0975bc9e88303bf81b9ad53418d06 Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Thu, 7 Dec 2023 14:28:14 +0100
Subject: [PATCH 05/14] Fix extraction intrinsics in tsan avx512 runtime

---
 compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
index ab8fbf2af3a76..8406c1b5c755b 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interface_avx512.cpp
@@ -17,8 +17,8 @@ using namespace __tsan;
 #ifdef __AVX512F__
 extern "C" void __tsan_scatter_vector8(__m512i vaddr, int size, uint8_t mask) {
   void *addr[8] = {};
-  __m256i v256_1 = _mm512_extracti64x4_epi64(vaddr, 0);
-  __m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 4);
+  __m256i v256_1 = _mm512_castsi512_si256(vaddr);
+  __m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 1);
   _mm256_store_si256((__m256i *)addr, v256_1);
   _mm256_store_si256((__m256i *)&(addr[4]), v256_2);
   uptr pc = CALLERPC;
@@ -30,8 +30,8 @@ extern "C" void __tsan_scatter_vector8(__m512i vaddr, int size, uint8_t mask) {
 
 extern "C" void __tsan_gather_vector8(__m512i vaddr, int size, uint8_t mask) {
   void *addr[8] = {};
-  __m256i v256_1 = _mm512_extracti64x4_epi64(vaddr, 0);
-  __m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 4);
+  __m256i v256_1 = _mm512_castsi512_si256(vaddr);
+  __m256i v256_2 = _mm512_extracti64x4_epi64(vaddr, 1);
   _mm256_store_si256((__m256i *)addr, v256_1);
   _mm256_store_si256((__m256i *)(&addr[4]), v256_2);
   uptr pc = CALLERPC;

>From f6368e4651c07818336b99e11b0c7aaec8db3787 Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Thu, 7 Dec 2023 14:29:19 +0100
Subject: [PATCH 06/14] Adapt tsan simd tests to use specific vectorization
 flags for building

---
 compiler-rt/test/tsan/lit.cfg.py                  | 2 ++
 compiler-rt/test/tsan/lit.site.cfg.py.in          | 2 ++
 compiler-rt/test/tsan/simd_broadcast_norace.c     | 8 ++++----
 compiler-rt/test/tsan/simd_broadcast_race.c       | 8 ++++----
 compiler-rt/test/tsan/simd_gather_race.c          | 8 ++++----
 compiler-rt/test/tsan/simd_gatherscatter_norace.c | 8 ++++----
 compiler-rt/test/tsan/simd_loadstore_norace.c     | 8 ++++----
 compiler-rt/test/tsan/simd_loadstore_race.c       | 8 ++++----
 compiler-rt/test/tsan/simd_scatter_race.c         | 8 ++++----
 9 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/compiler-rt/test/tsan/lit.cfg.py b/compiler-rt/test/tsan/lit.cfg.py
index a93333e2e593d..017dc9a981297 100644
--- a/compiler-rt/test/tsan/lit.cfg.py
+++ b/compiler-rt/test/tsan/lit.cfg.py
@@ -77,6 +77,8 @@ def get_required_attr(config, attr_name):
 else:
     config.substitutions.append(("%link_libcxx_tsan", ""))
 
+config.substitutions.append(("%avx2", "-mavx2" if config.target_has_mavx2 else ""))
+config.substitutions.append(("%avx512f", "-mavx512f" if config.target_has_mavx512f else ""))
 
 def build_invocation(compile_flags):
     return " " + " ".join([config.clang] + compile_flags) + " "
diff --git a/compiler-rt/test/tsan/lit.site.cfg.py.in b/compiler-rt/test/tsan/lit.site.cfg.py.in
index c6d453aaee26f..98c752c5ec2d1 100644
--- a/compiler-rt/test/tsan/lit.site.cfg.py.in
+++ b/compiler-rt/test/tsan/lit.site.cfg.py.in
@@ -6,6 +6,8 @@ config.has_libcxx = @TSAN_HAS_LIBCXX@
 config.apple_platform = "@TSAN_TEST_APPLE_PLATFORM@"
 config.apple_platform_min_deployment_target_flag = "@TSAN_TEST_MIN_DEPLOYMENT_TARGET_FLAG@"
 config.target_cflags = "@TSAN_TEST_TARGET_CFLAGS@"
+config.target_has_mavx2 = @COMPILER_RT_HAS_MAVX2_FLAG@
+config.target_has_mavx512f = @COMPILER_RT_HAS_MAVX512F_FLAG@
 config.target_arch = "@TSAN_TEST_TARGET_ARCH@"
 config.deflake_threshold = "@TSAN_TEST_DEFLAKE_THRESHOLD@"
 
diff --git a/compiler-rt/test/tsan/simd_broadcast_norace.c b/compiler-rt/test/tsan/simd_broadcast_norace.c
index 3a7c2cfe279dc..28f232b161da1 100644
--- a/compiler-rt/test/tsan/simd_broadcast_norace.c
+++ b/compiler-rt/test/tsan/simd_broadcast_norace.c
@@ -1,7 +1,7 @@
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 %avx2 -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 %avx2 -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 %avx512f -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 %avx512f -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
 #include "test.h"
 
 #ifndef SIMDLEN
diff --git a/compiler-rt/test/tsan/simd_broadcast_race.c b/compiler-rt/test/tsan/simd_broadcast_race.c
index 08d6207ede722..87c75d7abcdbf 100644
--- a/compiler-rt/test/tsan/simd_broadcast_race.c
+++ b/compiler-rt/test/tsan/simd_broadcast_race.c
@@ -1,7 +1,7 @@
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 %avx2 -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 %avx2 -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 %avx512f -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 %avx512f -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 #include "test.h"
 
 #ifndef SIMDLEN
diff --git a/compiler-rt/test/tsan/simd_gather_race.c b/compiler-rt/test/tsan/simd_gather_race.c
index 1d7c68a0bc93e..d5db72b6c7340 100644
--- a/compiler-rt/test/tsan/simd_gather_race.c
+++ b/compiler-rt/test/tsan/simd_gather_race.c
@@ -1,7 +1,7 @@
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 %avx2 -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 %avx2 -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 %avx512f -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 %avx512f -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 #include "test.h"
 
 #ifndef SIMDLEN
diff --git a/compiler-rt/test/tsan/simd_gatherscatter_norace.c b/compiler-rt/test/tsan/simd_gatherscatter_norace.c
index 3f5994119223c..f62729847da54 100644
--- a/compiler-rt/test/tsan/simd_gatherscatter_norace.c
+++ b/compiler-rt/test/tsan/simd_gatherscatter_norace.c
@@ -1,7 +1,7 @@
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 %avx2 -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 %avx2 -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 %avx512f -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 %avx512f -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
 #include "test.h"
 
 #ifndef SIMDLEN
diff --git a/compiler-rt/test/tsan/simd_loadstore_norace.c b/compiler-rt/test/tsan/simd_loadstore_norace.c
index ed3421d026a8e..dc9505409838d 100644
--- a/compiler-rt/test/tsan/simd_loadstore_norace.c
+++ b/compiler-rt/test/tsan/simd_loadstore_norace.c
@@ -1,7 +1,7 @@
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 %avx2 -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 %avx2 -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 %avx512f -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 %avx512f -fopenmp-simd %s -o %t && %run %t 2>&1 | FileCheck %s
 #include "test.h"
 
 #ifndef SIMDLEN
diff --git a/compiler-rt/test/tsan/simd_loadstore_race.c b/compiler-rt/test/tsan/simd_loadstore_race.c
index 4525404d1b8e5..f488755943dff 100644
--- a/compiler-rt/test/tsan/simd_loadstore_race.c
+++ b/compiler-rt/test/tsan/simd_loadstore_race.c
@@ -1,7 +1,7 @@
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 %avx2 -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 %avx2 -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 %avx512f -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 %avx512f -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 #include "test.h"
 
 #ifndef SIMDLEN
diff --git a/compiler-rt/test/tsan/simd_scatter_race.c b/compiler-rt/test/tsan/simd_scatter_race.c
index 84f9f4d51cab9..03617a58b2357 100644
--- a/compiler-rt/test/tsan/simd_scatter_race.c
+++ b/compiler-rt/test/tsan/simd_scatter_race.c
@@ -1,7 +1,7 @@
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
-// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 -march=native -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=float -O3 %avx2 -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=4 -DTYPE=double -O3 %avx2 -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=float -O3 %avx512f -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
+// RUN: %clang_tsan -DSIMDLEN=8 -DTYPE=double -O3 %avx512f -fopenmp-simd %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 #include "test.h"
 
 #ifndef SIMDLEN

>From 80554aedb6a57138766e013ccb6b86c8ca4b2b98 Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Thu, 7 Dec 2023 14:48:50 +0100
Subject: [PATCH 07/14] Fix lit config format

---
 compiler-rt/test/tsan/lit.cfg.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/test/tsan/lit.cfg.py b/compiler-rt/test/tsan/lit.cfg.py
index 017dc9a981297..0a77d30f4458b 100644
--- a/compiler-rt/test/tsan/lit.cfg.py
+++ b/compiler-rt/test/tsan/lit.cfg.py
@@ -78,7 +78,9 @@ def get_required_attr(config, attr_name):
     config.substitutions.append(("%link_libcxx_tsan", ""))
 
 config.substitutions.append(("%avx2", "-mavx2" if config.target_has_mavx2 else ""))
-config.substitutions.append(("%avx512f", "-mavx512f" if config.target_has_mavx512f else ""))
+config.substitutions.append(
+    ("%avx512f", "-mavx512f" if config.target_has_mavx512f else "")
+)
 
 def build_invocation(compile_flags):
     return " " + " ".join([config.clang] + compile_flags) + " "

>From 5ce165caf7a6ebd14e37ace4177699bee23ddd7b Mon Sep 17 00:00:00 2001
From: Joachim Jenke <jenke at itc.rwth-aachen.de>
Date: Thu, 7 Dec 2023 10:38:29 +0100
Subject: [PATCH 08/14] First draft of Instrumentation test

---
 .../ThreadSanitizer/tsan_avx_loadstore.ll     | 60 +++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_loadstore.ll

diff --git a/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_loadstore.ll b/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_loadstore.ll
new file mode 100644
index 0000000000000..b12330c1f391f
--- /dev/null
+++ b/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_loadstore.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -passes='function(tsan),module(tsan-module)' -S | FileCheck %s
+
+; target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <8 x i32> @read_8_int(ptr %a) sanitize_thread {
+entry:
+  %tmp1 = load <8 x i32>, ptr %a, align 4
+  ret <8 x i32> %tmp1
+}
+
+define <4 x i32> @read_4_int(ptr %a) sanitize_thread {
+entry:
+  %tmp1 = load <4 x i32>, ptr %a, align 4
+  ret <4 x i32> %tmp1
+}
+
+define <8 x double> @read_8_double_unaligned(ptr %a) sanitize_thread {
+entry:
+  %tmp1 = load <8 x double>, ptr %a, align 4
+  ret <8 x double> %tmp1
+}
+
+define <4 x double> @read_4_double_unaligned(ptr %a) sanitize_thread {
+entry:
+  %tmp1 = load <4 x double>, ptr %a, align 4
+  ret <4 x double> %tmp1
+}
+
+define <2 x double> @read_2_double_unaligned(ptr %a) sanitize_thread {
+entry:
+  %tmp1 = load <2 x double>, ptr %a, align 4
+  ret <2 x double> %tmp1
+}
+
+define <8 x double> @read_8_double(ptr %a) sanitize_thread {
+entry:
+  %tmp1 = load <8 x double>, ptr %a, align 8
+  ret <8 x double> %tmp1
+}
+
+define <4 x double> @read_4_double(ptr %a) sanitize_thread {
+entry:
+  %tmp1 = load <4 x double>, ptr %a, align 8
+  ret <4 x double> %tmp1
+}
+
+define <2 x double> @read_2_double(ptr %a) sanitize_thread {
+entry:
+  %tmp1 = load <2 x double>, ptr %a, align 8
+  ret <2 x double> %tmp1
+}
+
+
+
+define void @write_4_double(ptr %a) sanitize_thread {
+entry:
+  store <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, ptr %a
+  ret void
+}

>From fea56d76b1f6a13c474d6f62f1a5406b1551dd3e Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Thu, 7 Dec 2023 15:31:49 +0100
Subject: [PATCH 09/14] Require vectorization support for tsan masked simd
 tests

---
 compiler-rt/test/tsan/lit.cfg.py                 | 4 ++++
 compiler-rt/test/tsan/simd_scatter_mask_norace.c | 1 +
 compiler-rt/test/tsan/simd_scatter_mask_race.c   | 1 +
 3 files changed, 6 insertions(+)

diff --git a/compiler-rt/test/tsan/lit.cfg.py b/compiler-rt/test/tsan/lit.cfg.py
index 0a77d30f4458b..606024e8bfab4 100644
--- a/compiler-rt/test/tsan/lit.cfg.py
+++ b/compiler-rt/test/tsan/lit.cfg.py
@@ -77,7 +77,11 @@ def get_required_attr(config, attr_name):
 else:
     config.substitutions.append(("%link_libcxx_tsan", ""))
 
+if config.target_has_mavx2:
+    config.available_features.add("mavx2")
 config.substitutions.append(("%avx2", "-mavx2" if config.target_has_mavx2 else ""))
+if config.target_has_mavx512f:
+    config.available_features.add("mavx512f")
 config.substitutions.append(
     ("%avx512f", "-mavx512f" if config.target_has_mavx512f else "")
 )
diff --git a/compiler-rt/test/tsan/simd_scatter_mask_norace.c b/compiler-rt/test/tsan/simd_scatter_mask_norace.c
index 1526e3c9e05e5..80a66c3a87cd3 100644
--- a/compiler-rt/test/tsan/simd_scatter_mask_norace.c
+++ b/compiler-rt/test/tsan/simd_scatter_mask_norace.c
@@ -1,3 +1,4 @@
+// REQUIRES: mavx512f
 // RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=float %s -o %t && %run %t 2>&1 | FileCheck %s
 // RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=double %s -o %t && %run %t 2>&1 | FileCheck %s
 // RUN: %clang_tsan -march=native -DSIMDLEN=8 -DTYPE=float %s -o %t && %run %t 2>&1 | FileCheck %s
diff --git a/compiler-rt/test/tsan/simd_scatter_mask_race.c b/compiler-rt/test/tsan/simd_scatter_mask_race.c
index 8bf634494028a..22e5ae82d9384 100644
--- a/compiler-rt/test/tsan/simd_scatter_mask_race.c
+++ b/compiler-rt/test/tsan/simd_scatter_mask_race.c
@@ -1,3 +1,4 @@
+// REQUIRES: mavx512f
 // RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=float %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 // RUN: %clang_tsan -march=native -DSIMDLEN=4 -DTYPE=double %s -o %t && %deflake %run %t 2>&1 | FileCheck %s
 // RUN: %clang_tsan -march=native -DSIMDLEN=8 -DTYPE=float %s -o %t && %deflake %run %t 2>&1 | FileCheck %s

>From bfa17f8ca886dd4128218f4f30add2df17cc2132 Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Thu, 7 Dec 2023 16:39:40 +0100
Subject: [PATCH 10/14] Add check lines to tsan avx loadstore instrumentation
 test

---
 .../ThreadSanitizer/tsan_avx_loadstore.ll     | 32 +++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_loadstore.ll b/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_loadstore.ll
index b12330c1f391f..4fee2b025abaa 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_loadstore.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_loadstore.ll
@@ -8,53 +8,81 @@ entry:
   %tmp1 = load <8 x i32>, ptr %a, align 4
   ret <8 x i32> %tmp1
 }
+; CHECK: call void @__tsan_unaligned_read32(ptr %a)
 
 define <4 x i32> @read_4_int(ptr %a) sanitize_thread {
 entry:
   %tmp1 = load <4 x i32>, ptr %a, align 4
   ret <4 x i32> %tmp1
 }
+; CHECK: call void @__tsan_unaligned_read16(ptr %a)
 
 define <8 x double> @read_8_double_unaligned(ptr %a) sanitize_thread {
 entry:
   %tmp1 = load <8 x double>, ptr %a, align 4
   ret <8 x double> %tmp1
 }
+; CHECK: call void @__tsan_unaligned_read64(ptr %a)
 
 define <4 x double> @read_4_double_unaligned(ptr %a) sanitize_thread {
 entry:
   %tmp1 = load <4 x double>, ptr %a, align 4
   ret <4 x double> %tmp1
 }
+; CHECK: call void @__tsan_unaligned_read32(ptr %a)
 
 define <2 x double> @read_2_double_unaligned(ptr %a) sanitize_thread {
 entry:
   %tmp1 = load <2 x double>, ptr %a, align 4
   ret <2 x double> %tmp1
 }
+; CHECK: call void @__tsan_unaligned_read16(ptr %a)
 
 define <8 x double> @read_8_double(ptr %a) sanitize_thread {
 entry:
   %tmp1 = load <8 x double>, ptr %a, align 8
   ret <8 x double> %tmp1
 }
+; CHECK: call void @__tsan_read64(ptr %a)
 
 define <4 x double> @read_4_double(ptr %a) sanitize_thread {
 entry:
   %tmp1 = load <4 x double>, ptr %a, align 8
   ret <4 x double> %tmp1
 }
+; CHECK: call void @__tsan_read32(ptr %a)
 
 define <2 x double> @read_2_double(ptr %a) sanitize_thread {
 entry:
   %tmp1 = load <2 x double>, ptr %a, align 8
   ret <2 x double> %tmp1
 }
+; CHECK: call void @__tsan_read16(ptr %a)
 
-
+define void @write_8_double(ptr %a) sanitize_thread {
+entry:
+  store <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, ptr %a
+  ret void
+}
+; CHECK: call void @__tsan_write64(ptr %a)
 
 define void @write_4_double(ptr %a) sanitize_thread {
 entry:
-  store <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, ptr %a
+  store <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, ptr %a
+  ret void
+}
+; CHECK: call void @__tsan_write32(ptr %a)
+
+define void @write_8_float(ptr %a) sanitize_thread {
+entry:
+  store <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, ptr %a
+  ret void
+}
+; CHECK: call void @__tsan_write32(ptr %a)
+
+define void @write_4_float(ptr %a) sanitize_thread {
+entry:
+  store <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, ptr %a
   ret void
 }
+; CHECK: call void @__tsan_write16(ptr %a)

>From 78dbd65c940e0acd25955a9823af3fe9d0f32141 Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Fri, 8 Dec 2023 10:32:20 +0100
Subject: [PATCH 11/14] Fix gather & scatter tsan instrumentation function
 signatures

---
 .../lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 78b4f8edd3468..2d199dacc8dd6 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -222,21 +222,19 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) {
 
   TsanVectorScatter[0] = M.getOrInsertFunction(
       SmallString<32>("__tsan_scatter_vector4"), Attr, IRB.getVoidTy(),
-      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(4)),
+      VectorType::get(IRB.getPtrTy(), ElementCount::getFixed(4)),
       IRB.getInt32Ty(), IRB.getInt8Ty());
   TsanVectorScatter[1] = M.getOrInsertFunction(
       SmallString<32>("__tsan_scatter_vector8"), Attr, IRB.getVoidTy(),
-      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(8)),
-      IRB.getInt32Ty(),
-      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(4)),
+      VectorType::get(IRB.getPtrTy(), ElementCount::getFixed(8)),
       IRB.getInt32Ty(), IRB.getInt8Ty());
   TsanVectorGather[0] = M.getOrInsertFunction(
       SmallString<32>("__tsan_gather_vector4"), Attr, IRB.getVoidTy(),
-      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(4)),
+      VectorType::get(IRB.getPtrTy(), ElementCount::getFixed(4)),
       IRB.getInt32Ty(), IRB.getInt8Ty());
   TsanVectorGather[1] = M.getOrInsertFunction(
       SmallString<32>("__tsan_gather_vector8"), Attr, IRB.getVoidTy(),
-      VectorType::get(IRB.getIntPtrTy(DL, 8), ElementCount::getFixed(8)),
+      VectorType::get(IRB.getPtrTy(), ElementCount::getFixed(8)),
       IRB.getInt32Ty(), IRB.getInt8Ty());
 
   for (size_t i = 0; i < kNumberOfAccessSizes; ++i) {

>From 420a350fd114ba980a9f2be7d4a823c3a466ca8d Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Fri, 8 Dec 2023 10:33:33 +0100
Subject: [PATCH 12/14] Add test for tsan gather & scatter instrumentation

---
 .../ThreadSanitizer/tsan_avx_gatherscatter.ll | 71 +++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_gatherscatter.ll

diff --git a/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_gatherscatter.ll b/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_gatherscatter.ll
new file mode 100644
index 0000000000000..500fa4215c51d
--- /dev/null
+++ b/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_gatherscatter.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -passes='function(tsan),module(tsan-module)' -S | FileCheck %s
+
+; target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32 immarg, <8 x i1>)
+declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32 immarg, <8 x i1>)
+declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x double>)
+declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x float>)
+declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32 immarg, <4 x i1>)
+declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x double>)
+
+define void @scatter_8_double_mask(<8 x double> %a, <8 x ptr> %p, <8 x i1> %m) sanitize_thread {
+entry:
+  tail call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %a, <8 x ptr> %p, i32 8, <8 x i1> %m)
+  ret void
+}
+; CHECK: %1 = bitcast <8 x i1> %m to i8
+; CHECK-NEXT: call void @__tsan_scatter_vector8(<8 x ptr> %p, i32 8, i8 %1)
+
+define void @scatter_8_float_mask(<8 x float> %a, <8 x ptr> %p, <8 x i1> %m) sanitize_thread {
+entry:
+  tail call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> %a, <8 x ptr> %p, i32 4, <8 x i1> %m)
+  ret void
+}
+; CHECK: %1 = bitcast <8 x i1> %m to i8
+; CHECK-NEXT: call void @__tsan_scatter_vector8(<8 x ptr> %p, i32 4, i8 %1)
+
+define void @scatter_8_double(<8 x double> %a, <8 x ptr> %p) sanitize_thread {
+entry:
+  tail call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> %a, <8 x ptr> %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+  ret void
+}
+; CHECK: call void @__tsan_scatter_vector8(<8 x ptr> %p, i32 8, i8 bitcast (<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> to i8))
+
+define void @scatter_4_double(<4 x double> %a, <4 x ptr> %p) sanitize_thread {
+entry:
+  tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %a, <4 x ptr> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret void
+}
+; CHECK: call void @__tsan_scatter_vector4(<4 x ptr> %p, i32 8, i8 bitcast (<4 x i1> <i1 true, i1 true, i1 true, i1 true> to i8))
+
+define void @gather_8_double_mask(<8 x double> %a, <8 x ptr> %p, <8 x i1> %m) sanitize_thread {
+entry:
+  tail call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %p, i32 8, <8 x i1> %m, <8 x double> %a)
+  ret void
+}
+; CHECK: %1 = bitcast <8 x i1> %m to i8
+; CHECK-NEXT: call void @__tsan_gather_vector8(<8 x ptr> %p, i32 8, i8 %1)
+
+define void @gather_8_float_mask(<8 x float> %a, <8 x ptr> %p, <8 x i1> %m) sanitize_thread {
+entry:
+  tail call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> %p, i32 4, <8 x i1> %m, <8 x float> %a)
+  ret void
+}
+; CHECK: %1 = bitcast <8 x i1> %m to i8
+; CHECK-NEXT: call void @__tsan_gather_vector8(<8 x ptr> %p, i32 4, i8 %1)
+
+define void @gather_8_double(<8 x double> %a, <8 x ptr> %p) sanitize_thread {
+entry:
+  tail call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x double> %a)
+  ret void
+}
+; CHECK: call void @__tsan_gather_vector8(<8 x ptr> %p, i32 8, i8 bitcast (<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> to i8))
+
+define void @gather_4_double(<4 x double> %a, <4 x ptr> %p) sanitize_thread {
+entry:
+  tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %p, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> %a)
+  ret void
+}
+; CHECK: call void @__tsan_gather_vector4(<4 x ptr> %p, i32 8, i8 bitcast (<4 x i1> <i1 true, i1 true, i1 true, i1 true> to i8))

>From f5e62a2be42817c084540e44b2a99fd7d508a4c2 Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Tue, 12 Dec 2023 08:28:59 +0100
Subject: [PATCH 13/14] Fix avx2 instrumentation using i8 instead of i4 for
 bitcast mask

---
 llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index 2d199dacc8dd6..cad246a19092d 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -223,7 +223,7 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) {
   TsanVectorScatter[0] = M.getOrInsertFunction(
       SmallString<32>("__tsan_scatter_vector4"), Attr, IRB.getVoidTy(),
       VectorType::get(IRB.getPtrTy(), ElementCount::getFixed(4)),
-      IRB.getInt32Ty(), IRB.getInt8Ty());
+      IRB.getInt32Ty(), IRB.getIntNTy(4));
   TsanVectorScatter[1] = M.getOrInsertFunction(
       SmallString<32>("__tsan_scatter_vector8"), Attr, IRB.getVoidTy(),
       VectorType::get(IRB.getPtrTy(), ElementCount::getFixed(8)),
@@ -231,7 +231,7 @@ void ThreadSanitizer::initialize(Module &M, const TargetLibraryInfo &TLI) {
   TsanVectorGather[0] = M.getOrInsertFunction(
       SmallString<32>("__tsan_gather_vector4"), Attr, IRB.getVoidTy(),
       VectorType::get(IRB.getPtrTy(), ElementCount::getFixed(4)),
-      IRB.getInt32Ty(), IRB.getInt8Ty());
+      IRB.getInt32Ty(), IRB.getIntNTy(4));
   TsanVectorGather[1] = M.getOrInsertFunction(
       SmallString<32>("__tsan_gather_vector8"), Attr, IRB.getVoidTy(),
       VectorType::get(IRB.getPtrTy(), ElementCount::getFixed(8)),
@@ -715,7 +715,8 @@ bool ThreadSanitizer::instrumentGatherOrScatter(Instruction *I,
   std::vector<Value *> Args{
       I->getOperand(IsScatter ? 1 : 0),
       llvm::ConstantInt::get(I->getContext(), llvm::APInt(32, BytesPerElement)),
-      IRB.CreateBitCast(I->getOperand(IsScatter ? 3 : 2), IRB.getInt8Ty())};
+      IRB.CreateBitCast(I->getOperand(IsScatter ? 3 : 2),
+                        IRB.getIntNTy(NumElements))};
   IRB.CreateCall(TsanVector[NumElements == 4 ? 0 : 1], Args);
 
   return true;

>From 9702a23c25fbc9ce2c08d77fa52bffd6d8ff61d7 Mon Sep 17 00:00:00 2001
From: "felix.tomski" <tomski at itc.rwth-aachen.de>
Date: Tue, 12 Dec 2023 08:30:05 +0100
Subject: [PATCH 14/14] Split TSan avx instrumentation tests in avx2 and avx512
 testcases

---
 .../tsan_avx2_gatherscatter.ll                | 30 +++++++++++++++++++
 ...catter.ll => tsan_avx512_gatherscatter.ll} | 16 ----------
 2 files changed, 30 insertions(+), 16 deletions(-)
 create mode 100644 llvm/test/Instrumentation/ThreadSanitizer/tsan_avx2_gatherscatter.ll
 rename llvm/test/Instrumentation/ThreadSanitizer/{tsan_avx_gatherscatter.ll => tsan_avx512_gatherscatter.ll} (75%)

diff --git a/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx2_gatherscatter.ll b/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx2_gatherscatter.ll
new file mode 100644
index 0000000000000..9cadf59634f5a
--- /dev/null
+++ b/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx2_gatherscatter.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -passes='function(tsan),module(tsan-module)' -S | FileCheck %s
+
+; target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32 immarg, <4 x i1>)
+declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x double>)
+
+define void @scatter_4_double_mask(<4 x double> %a, <4 x ptr> %p, <4 x i1> %m) sanitize_thread {
+entry:
+  tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %a, <4 x ptr> %p, i32 8, <4 x i1> %m)
+  ret void
+}
+; CHECK: %1 = bitcast <4 x i1> %m to i4
+; CHECK-NEXT: call void @__tsan_scatter_vector4(<4 x ptr> %p, i32 8, i4 %1)
+
+define void @scatter_4_double(<4 x double> %a, <4 x ptr> %p) sanitize_thread {
+entry:
+  tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %a, <4 x ptr> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
+  ret void
+}
+; CHECK: call void @__tsan_scatter_vector4(<4 x ptr> %p, i32 8, i4 bitcast (<4 x i1> <i1 true, i1 true, i1 true, i1 true> to i4))
+
+define void @gather_4_double(<4 x double> %a, <4 x ptr> %p) sanitize_thread {
+entry:
+  tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %p, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> %a)
+  ret void
+}
+; CHECK: call void @__tsan_gather_vector4(<4 x ptr> %p, i32 8, i4 bitcast (<4 x i1> <i1 true, i1 true, i1 true, i1 true> to i4))
+
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_gatherscatter.ll b/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx512_gatherscatter.ll
similarity index 75%
rename from llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_gatherscatter.ll
rename to llvm/test/Instrumentation/ThreadSanitizer/tsan_avx512_gatherscatter.ll
index 500fa4215c51d..a70c4c8373f66 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx_gatherscatter.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/tsan_avx512_gatherscatter.ll
@@ -7,8 +7,6 @@ declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32 immarg
 declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32 immarg, <8 x i1>)
 declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x double>)
 declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x float>)
-declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32 immarg, <4 x i1>)
-declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32 immarg, <4 x i1>, <4 x double>)
 
 define void @scatter_8_double_mask(<8 x double> %a, <8 x ptr> %p, <8 x i1> %m) sanitize_thread {
 entry:
@@ -33,13 +31,6 @@ entry:
 }
 ; CHECK: call void @__tsan_scatter_vector8(<8 x ptr> %p, i32 8, i8 bitcast (<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> to i8))
 
-define void @scatter_4_double(<4 x double> %a, <4 x ptr> %p) sanitize_thread {
-entry:
-  tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> %a, <4 x ptr> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
-  ret void
-}
-; CHECK: call void @__tsan_scatter_vector4(<4 x ptr> %p, i32 8, i8 bitcast (<4 x i1> <i1 true, i1 true, i1 true, i1 true> to i8))
-
 define void @gather_8_double_mask(<8 x double> %a, <8 x ptr> %p, <8 x i1> %m) sanitize_thread {
 entry:
   tail call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> %p, i32 8, <8 x i1> %m, <8 x double> %a)
@@ -62,10 +53,3 @@ entry:
   ret void
 }
 ; CHECK: call void @__tsan_gather_vector8(<8 x ptr> %p, i32 8, i8 bitcast (<8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> to i8))
-
-define void @gather_4_double(<4 x double> %a, <4 x ptr> %p) sanitize_thread {
-entry:
-  tail call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> %p, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> %a)
-  ret void
-}
-; CHECK: call void @__tsan_gather_vector4(<4 x ptr> %p, i32 8, i8 bitcast (<4 x i1> <i1 true, i1 true, i1 true, i1 true> to i8))



More information about the Openmp-commits mailing list