[llvm] [msan] Add handlers for AVX masked load/store intrinsics (PR #123857)

Wed Jan 22 10:19:35 PST 2025

================
@@ -4408,6 +4522,30 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       handleVtestIntrinsic(I);
       break;
 
+    case Intrinsic::x86_avx_maskstore_ps:
+    case Intrinsic::x86_avx_maskstore_pd:
+    case Intrinsic::x86_avx_maskstore_ps_256:
+    case Intrinsic::x86_avx_maskstore_pd_256:
+    case Intrinsic::x86_avx2_maskstore_d:
+    case Intrinsic::x86_avx2_maskstore_q:
+    case Intrinsic::x86_avx2_maskstore_d_256:
+    case Intrinsic::x86_avx2_maskstore_q_256: {
----------------
thurstond wrote:

That's an intriguing question, thanks!

LLVM automatically "upgrades" avx512.mask.load/store into the @llvm.masked.load/store intrinsic:
```
  } else if (Name.starts_with("avx512.mask.load")) {
    // "avx512.mask.loadu." or "avx512.mask.load."
    bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
    Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
                            CI->getArgOperand(2), Aligned);
```
(https://github.com/llvm/llvm-project/blob/main/llvm/lib/IR/AutoUpgrade.cpp#L2801)

As a result, MemorySanitizer already handles avx512.mask.load/store via its existing, separate code path for instrumenting @llvm.masked.load/store (handleMaskedLoad/Store). For example, MemorySanitizer transforms:
```
define void at test_int_x86_avx512_mask_store_d_512(ptr %ptr1, ptr %ptr2, <16 x i32> %x1, i16 %x2) sanitize_memory {
  call void @llvm.x86.avx512.mask.store.d.512(ptr %ptr1, <16 x i32> %x1, i16 %x2)
  ret void
}

declare void @llvm.x86.avx512.mask.store.d.512(ptr, <16 x i32>, i16)
```
into:
```
; Function Attrs: sanitize_memory
define void @test_int_x86_avx512_mask_store_d_512(ptr %ptr1, ptr %ptr2, <16 x i32> %x1, i16 %x2) #0 {
  %1 = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
  %2 = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
  %3 = load i64, ptr @__msan_param_tls, align 8
  call void @llvm.donothing()
  %4 = bitcast i16 %1 to <16 x i1>
  %5 = bitcast i16 %x2 to <16 x i1>
  %6 = ptrtoint ptr %ptr1 to i64
  %7 = xor i64 %6, 87960930222080
  %8 = inttoptr i64 %7 to ptr
  call void @llvm.masked.store.v16i32.p0(<16 x i32> %2, ptr %8, i32 64, <16 x i1> %5)
  %_mscmp = icmp ne i64 %3, 0
  %9 = bitcast <16 x i1> %4 to i16
  %_mscmp1 = icmp ne i16 %9, 0
  %_msor = or i1 %_mscmp, %_mscmp1
  br i1 %_msor, label %10, label %11, !prof !1

10:                                               ; preds = %0
  call void @__msan_warning_noreturn() #4
  unreachable

11:                                               ; preds = %0
  call void @llvm.masked.store.v16i32.p0(<16 x i32> %x1, ptr %ptr1, i32 64, <16 x i1> %5)
  ret void
}
```
(Notice there are two @llvm.masked.store instructions: the first one is the shadow computation, the second one is the original computation.)

The "upgrade" process doesn't happen for AVX/AVX2, because the x86 backend efficiently convert from the @llvm.masked.load/store format (vector of booleans) back into the AVX masked load/store format (vector of ints) (according to X86InstCombineIntrinsic.cpp::simplifyX86MaskedLoad); as a result, MSan needs special handling of AVX/AVX2 masked loads/stores.

I'll add a test case in another pull request to keep track of AVX512 :-)


https://github.com/llvm/llvm-project/pull/123857