[llvm] 4452cc4 - [VectorCombine] Don't vectorize scalar load under asan/hwasan/memtag/tsan

Tue Sep 15 09:52:09 PDT 2020

Author: Fangrui Song
Date: 2020-09-15T09:47:21-07:00
New Revision: 4452cc4086aca1a424b2cd40da9fa120add522e7

URL: https://github.com/llvm/llvm-project/commit/4452cc4086aca1a424b2cd40da9fa120add522e7
DIFF: https://github.com/llvm/llvm-project/commit/4452cc4086aca1a424b2cd40da9fa120add522e7.diff

LOG: [VectorCombine] Don't vectorize scalar load under asan/hwasan/memtag/tsan

Similar to the tsan suppression in
`Utils/VNCoercion.cpp:getLoadLoadClobberFullWidthSize` (rL175034; load widening used by GVN),
the D81766 optimization should be suppressed under tsan due to potential
spurious data race reports:

  struct A {
    int i;
    const short s; // the load cannot be vectorized because
    int modify;    // it overlaps with bytes being concurrently modified
    long pad1, pad2;
  };
  // __tsan_read16 does not know that some bytes are undef and accessing is safe

Similarly, under asan, users can mark memory regions with
`__asan_poison_memory_region`. A widened load can lead to a spurious
use-after-poison error. hwasan/memtag should be similarly suppressed.

`mustSuppressSpeculation` suppresses asan/hwasan/tsan but not memtag, so
we need to exclude memtag in `vectorizeLoadInsert`.

Note, memtag suppression can be relaxed if the load is aligned to the
its granule (usually 16), but that is out of scope of this patch.

Reviewed By: spatel, vitalybuka

Differential Revision: https://reviews.llvm.org/D87538

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/VectorCombine.cpp
    llvm/test/Transforms/VectorCombine/X86/load.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 29e9b92040d4..829f640941ac 100644

--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -98,7 +98,12 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) {
     return false;
   auto *Load = dyn_cast<LoadInst>(Scalar);
   Type *ScalarTy = Scalar->getType();
-  if (!Load || !Load->isSimple())
+  // Do not vectorize scalar load (widening) if atomic/volatile or under
+  // asan/hwasan/memtag/tsan. The widened load may load data from dirty regions
+  // or create data races non-existent in the source.
+  if (!Load || !Load->isSimple() ||
+      Load->getFunction()->hasFnAttribute(Attribute::SanitizeMemTag) ||
+      mustSuppressSpeculation(*Load))
     return false;
   auto *Ty = dyn_cast<FixedVectorType>(I.getType());
   if (!Ty)

diff  --git a/llvm/test/Transforms/VectorCombine/X86/load.ll b/llvm/test/Transforms/VectorCombine/X86/load.ll
index f0c5b6ef7ad8..9ea027940ad3 100644
--- a/llvm/test/Transforms/VectorCombine/X86/load.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/load.ll
@@ -292,6 +292,66 @@ define <8 x i16> @gep10_load_i16_insert_v8i16(<8 x i16>* align 16 dereferenceabl
   ret <8 x i16> %r
 }
 
+; Negative test - disable under asan because widened load can cause spurious
+; use-after-poison issues when __asan_poison_memory_region is used.
+
+define <8 x i16> @gep10_load_i16_insert_v8i16_asan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_address {
+; CHECK-LABEL: @gep10_load_i16_insert_v8i16_asan(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
+; CHECK-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 16
+; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
+; CHECK-NEXT:    ret <8 x i16> [[R]]
+;
+  %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0
+  %s = load i16, i16* %gep, align 16
+  %r = insertelement <8 x i16> undef, i16 %s, i64 0
+  ret <8 x i16> %r
+}
+
+; hwasan and memtag should be similarly suppressed.
+
+define <8 x i16> @gep10_load_i16_insert_v8i16_hwasan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_hwaddress {
+; CHECK-LABEL: @gep10_load_i16_insert_v8i16_hwasan(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
+; CHECK-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 16
+; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
+; CHECK-NEXT:    ret <8 x i16> [[R]]
+;
+  %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0
+  %s = load i16, i16* %gep, align 16
+  %r = insertelement <8 x i16> undef, i16 %s, i64 0
+  ret <8 x i16> %r
+}
+
+define <8 x i16> @gep10_load_i16_insert_v8i16_memtag(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_memtag {
+; CHECK-LABEL: @gep10_load_i16_insert_v8i16_memtag(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
+; CHECK-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 16
+; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
+; CHECK-NEXT:    ret <8 x i16> [[R]]
+;
+  %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0
+  %s = load i16, i16* %gep, align 16
+  %r = insertelement <8 x i16> undef, i16 %s, i64 0
+  ret <8 x i16> %r
+}
+
+; Negative test - disable under tsan because widened load may overlap bytes
+; being concurrently modified. tsan does not know that some bytes are undef.
+
+define <8 x i16> @gep10_load_i16_insert_v8i16_tsan(<8 x i16>* align 16 dereferenceable(32) %p) sanitize_thread {
+; CHECK-LABEL: @gep10_load_i16_insert_v8i16_tsan(
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds <8 x i16>, <8 x i16>* [[P:%.*]], i64 1, i64 0
+; CHECK-NEXT:    [[S:%.*]] = load i16, i16* [[GEP]], align 16
+; CHECK-NEXT:    [[R:%.*]] = insertelement <8 x i16> undef, i16 [[S]], i64 0
+; CHECK-NEXT:    ret <8 x i16> [[R]]
+;
+  %gep = getelementptr inbounds <8 x i16>, <8 x i16>* %p, i64 1, i64 0
+  %s = load i16, i16* %gep, align 16
+  %r = insertelement <8 x i16> undef, i16 %s, i64 0
+  ret <8 x i16> %r
+}
+
 ; Negative test - can't safely load the offset vector, but could load+shuffle.
 
 define <8 x i16> @gep10_load_i16_insert_v8i16_deref(<8 x i16>* align 16 dereferenceable(31) %p) {
@@ -393,3 +453,16 @@ define <2 x float> @load_f32_insert_v2f32(float* align 16 dereferenceable(16) %p
   %r = insertelement <2 x float> undef, float %s, i32 0
   ret <2 x float> %r
 }
+
+; Negative test - suppress load widening for asan/hwasan/memtag/tsan.
+
+define <2 x float> @load_f32_insert_v2f32_asan(float* align 16 dereferenceable(16) %p) sanitize_address {
+; CHECK-LABEL: @load_f32_insert_v2f32_asan(
+; CHECK-NEXT:    [[S:%.*]] = load float, float* [[P:%.*]], align 4
+; CHECK-NEXT:    [[R:%.*]] = insertelement <2 x float> undef, float [[S]], i32 0
+; CHECK-NEXT:    ret <2 x float> [[R]]
+;
+  %s = load float, float* %p, align 4
+  %r = insertelement <2 x float> undef, float %s, i32 0
+  ret <2 x float> %r
+}