[llvm] [msan] Handle AVX512 VCVTPS2PH (PR #154460)

Wed Aug 20 09:18:00 PDT 2025

https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/154460

>From b2ff7746d1c504fb60a12216a04cdf0e409203a8 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 02:59:27 +0000
Subject: [PATCH 1/7] [msan] Handle AVX512 VCVTPS2PH

This extends maybeExtendVectorShadowWithZeros() from 556c8467d15a131552e3c84478d768bafd95d4e6 (https://github.com/llvm/llvm-project/pull/147377)
to handle AVX512 VCVTPS2PH.
---
 .../Instrumentation/MemorySanitizer.cpp       | 103 ++++++++++++++----
 .../MemorySanitizer/X86/avx512-intrinsics.ll  |  52 ++++-----
 .../X86/avx512vl-intrinsics.ll                |  93 ++++++++--------
 3 files changed, 151 insertions(+), 97 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 948e2c6e06843..13262c2c8b36f 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3429,8 +3429,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     return ShadowType;
   }
 
-  /// Doubles the length of a vector shadow (filled with zeros) if necessary to
-  /// match the length of the shadow for the instruction.
+  /// Doubles the length of a vector shadow (extending with zeros) if necessary
+  /// to match the length of the shadow for the instruction.
+  /// If scalar types of the vectors are different, it will use the type of the
+  /// input vector.
   /// This is more type-safe than CreateShadowCast().
   Value *maybeExtendVectorShadowWithZeros(Value *Shadow, IntrinsicInst &I) {
     IRBuilder<> IRB(&I);
@@ -3440,10 +3442,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     Value *FullShadow = getCleanShadow(&I);
     assert(cast<FixedVectorType>(Shadow->getType())->getNumElements() <=
            cast<FixedVectorType>(FullShadow->getType())->getNumElements());
-    assert(cast<FixedVectorType>(Shadow->getType())->getScalarType() ==
-           cast<FixedVectorType>(FullShadow->getType())->getScalarType());
 
-    if (Shadow->getType() == FullShadow->getType()) {
+    if (cast<FixedVectorType>(Shadow->getType())->getNumElements() ==
+        cast<FixedVectorType>(FullShadow->getType())->getNumElements()) {
       FullShadow = Shadow;
     } else {
       // TODO: generalize beyond 2x?
@@ -4528,55 +4529,93 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     return isFixedFPVectorTy(V->getType());
   }
 
-  // e.g., call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
-  //                           (<16 x float> a, <16 x i32> writethru, i16 mask,
-  //                           i32 rounding)
+  // e.g., <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
+  //                      (<16 x float> a, <16 x i32> writethru, i16 mask,
+  //                       i32 rounding)
+  //
+  // Inconveniently, some similar intrinsics have a different operand order:
+  //       <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
+  //                      (<16 x float> a, i32 rounding, <16 x i16> writethru,
+  //                       i16 mask)
+  //
+  // If the return type has more elements than A, the excess elements are
+  // zeroed (and the corresponding shadow is initialized).
+  //       <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
+  //                      (<4 x float> a, i32 rounding, <8 x i16> writethru,
+  //                       i8 mask)
   //
   // dst[i] = mask[i] ? convert(a[i]) : writethru[i]
   // dst_shadow[i] = mask[i] ? all_or_nothing(a_shadow[i]) : writethru_shadow[i]
   //    where all_or_nothing(x) is fully uninitialized if x has any
   //    uninitialized bits
-  void handleAVX512VectorConvertFPToInt(IntrinsicInst &I) {
+  void handleAVX512VectorConvertFPToInt(IntrinsicInst &I, bool LastMask) {
     IRBuilder<> IRB(&I);
 
     assert(I.arg_size() == 4);
     Value *A = I.getOperand(0);
-    Value *WriteThrough = I.getOperand(1);
-    Value *Mask = I.getOperand(2);
-    Value *RoundingMode = I.getOperand(3);
+    Value *WriteThrough;
+    Value *Mask;
+    Value *RoundingMode;
+    if (LastMask) {
+      WriteThrough = I.getOperand(2);
+      Mask = I.getOperand(3);
+      RoundingMode = I.getOperand(1);
+    } else {
+      WriteThrough = I.getOperand(1);
+      Mask = I.getOperand(2);
+      RoundingMode = I.getOperand(3);
+    }
 
     assert(isFixedFPVector(A));
     assert(isFixedIntVector(WriteThrough));
 
     unsigned ANumElements =
         cast<FixedVectorType>(A->getType())->getNumElements();
-    assert(ANumElements ==
-           cast<FixedVectorType>(WriteThrough->getType())->getNumElements());
+    unsigned WriteThruNumElements =
+        cast<FixedVectorType>(WriteThrough->getType())->getNumElements();
+    assert(ANumElements == WriteThruNumElements ||
+           ANumElements * 2 == WriteThruNumElements);
 
     assert(Mask->getType()->isIntegerTy());
-    assert(Mask->getType()->getScalarSizeInBits() == ANumElements);
+    unsigned MaskNumElements = Mask->getType()->getScalarSizeInBits();
+    assert(ANumElements == MaskNumElements ||
+           ANumElements * 2 == MaskNumElements);
+
+    assert(WriteThruNumElements == MaskNumElements);
+
     insertCheckShadowOf(Mask, &I);
 
     assert(RoundingMode->getType()->isIntegerTy());
-    // Only four bits of the rounding mode are used, though it's very
+    // Only some bits of the rounding mode are used, though it's very
     // unusual to have uninitialized bits there (more commonly, it's a
     // constant).
     insertCheckShadowOf(RoundingMode, &I);
 
     assert(I.getType() == WriteThrough->getType());
 
+    Value *AShadow = getShadow(A);
+    AShadow = maybeExtendVectorShadowWithZeros(AShadow, I);
+
+    if (ANumElements * 2 == MaskNumElements) {
+      // Ensure that the irrelevant bits of the mask are zero, hence selecting
+      // from the zeroed shadow instead of the writethrough's shadow.
+      Mask = IRB.CreateTrunc(Mask, IRB.getIntNTy(ANumElements));
+      Mask = IRB.CreateZExt(Mask, IRB.getIntNTy(MaskNumElements));
+    }
+
     // Convert i16 mask to <16 x i1>
     Mask = IRB.CreateBitCast(
-        Mask, FixedVectorType::get(IRB.getInt1Ty(), ANumElements));
+        Mask, FixedVectorType::get(IRB.getInt1Ty(), MaskNumElements));
 
-    Value *AShadow = getShadow(A);
-    /// For scalars:
-    /// Since they are converting from floating-point, the output is:
+    /// For floating-point to integer conversion, the output is:
     /// - fully uninitialized if *any* bit of the input is uninitialized
     /// - fully ininitialized if all bits of the input are ininitialized
     /// We apply the same principle on a per-element basis for vectors.
-    AShadow = IRB.CreateSExt(IRB.CreateICmpNE(AShadow, getCleanShadow(A)),
-                             getShadowTy(A));
+    ///
+    /// We use the scalar width of the return type instead of A's.
+    AShadow = IRB.CreateSExt(
+        IRB.CreateICmpNE(AShadow, getCleanShadow(AShadow->getType())),
+        getShadowTy(&I));
 
     Value *WriteThroughShadow = getShadow(WriteThrough);
     Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow);
@@ -5920,11 +5959,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                         /*trailingVerbatimArgs=*/1);
       break;
 
+    // Convert Packed Single Precision Floating-Point Values
+    //   to Packed SignedDoubleword Integer Values
+    //
+    // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
+    //                (<16 x float>, <16 x i32>, i16, i32)
     case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
-      handleAVX512VectorConvertFPToInt(I);
+      handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
       break;
     }
 
+    // Convert Single-Precision FP Value to 16-bit FP Value
+    // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
+    //                (<16 x float>, i32, <16 x i16>, i16)
+    //  <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
+    //                (<4 x float>, i32, <8 x i16>, i8)
+    //  <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
+    //                (<8 x float>, i32, <8 x i16>, i8)
+    case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
+    case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
+    case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
+      handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true);
+      break;
+
     // AVX512 PMOV: Packed MOV, with truncation
     // Precisely handled by applying the same intrinsic to the shadow
     case Intrinsic::x86_avx512_mask_pmov_dw_512:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
index 3c1af6781f0ed..eba0beb5bf6ac 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
@@ -1903,50 +1903,46 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK:       6:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT:    unreachable
-; CHECK:       7:
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP7:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i16>
+; CHECK-NEXT:    [[TMP8:%.*]] = select <16 x i1> splat (i1 true), <16 x i16> [[TMP7]], <16 x i16> zeroinitializer
 ; CHECK-NEXT:    [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0:%.*]], i32 2, <16 x i16> zeroinitializer, i16 -1)
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
+; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[TMP10]], <16 x i16> [[TMP12]], <16 x i16> zeroinitializer
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK:       9:
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK:       12:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       10:
-; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK:%.*]])
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT:    [[TMP12:%.*]] = bitcast <16 x i16> [[TMP3]] to i256
-; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i256 [[TMP12]], 0
-; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT:    br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
 ; CHECK:       13:
+; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK]])
+; CHECK-NEXT:    [[TMP25:%.*]] = bitcast i16 [[MASK]] to <16 x i1>
+; CHECK-NEXT:    [[TMP26:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP27:%.*]] = sext <16 x i1> [[TMP26]] to <16 x i16>
+; CHECK-NEXT:    [[TMP20:%.*]] = select <16 x i1> [[TMP25]], <16 x i16> [[TMP27]], <16 x i16> [[TMP3]]
+; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
+; CHECK:       18:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       14:
+; CHECK:       19:
 ; CHECK-NEXT:    [[RES3:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 12, <16 x i16> [[SRC:%.*]], i16 [[MASK]])
 ; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP8]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
-; CHECK:       15:
+; CHECK-NEXT:    br i1 [[_MSCMP8]], label [[TMP24:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
+; CHECK:       20:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       16:
+; CHECK:       21:
 ; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
 ; CHECK-NEXT:    [[TMP18:%.*]] = xor i64 [[TMP17]], 87960930222080
 ; CHECK-NEXT:    [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
-; CHECK-NEXT:    store <16 x i16> zeroinitializer, ptr [[TMP19]], align 32
+; CHECK-NEXT:    store <16 x i16> [[TMP8]], ptr [[TMP19]], align 32
 ; CHECK-NEXT:    store <16 x i16> [[RES1]], ptr [[DST]], align 32
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <16 x i16> [[TMP13]], [[TMP20]]
 ; CHECK-NEXT:    [[RES:%.*]] = add <16 x i16> [[RES2]], [[RES3]]
-; CHECK-NEXT:    store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <16 x i16> [[RES]]
 ;
   %res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
index db0c2e7ae9ed6..e22301174a0ca 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
@@ -7893,40 +7893,44 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %s
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
-; CHECK:       [[BB5]]:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
-; CHECK-NEXT:    unreachable
-; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> bitcast (<1 x i8> splat (i8 15) to <8 x i1>), <8 x i16> [[TMP6]], <8 x i16> zeroinitializer
 ; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 2, <8 x i16> zeroinitializer, i8 -1)
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP9:%.*]] = trunc i8 [[MASK]] to i4
+; CHECK-NEXT:    [[TMP10:%.*]] = zext i4 [[TMP9]] to i8
+; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i8 [[TMP10]] to <8 x i1>
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne <8 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT:    [[TMP13:%.*]] = sext <8 x i1> [[TMP12]] to <8 x i16>
+; CHECK-NEXT:    [[TMP14:%.*]] = select <8 x i1> [[TMP11]], <8 x i16> [[TMP13]], <8 x i16> zeroinitializer
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
-; CHECK:       [[BB8]]:
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]]
+; CHECK:       [[BB15]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB9]]:
+; CHECK:       [[BB16]]:
 ; CHECK-NEXT:    [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 10, <8 x i16> zeroinitializer, i8 [[MASK]])
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT:    [[TMP18:%.*]] = trunc i8 [[MASK]] to i4
+; CHECK-NEXT:    [[TMP19:%.*]] = zext i4 [[TMP18]] to i8
+; CHECK-NEXT:    [[TMP20:%.*]] = bitcast i8 [[TMP19]] to <8 x i1>
+; CHECK-NEXT:    [[TMP21:%.*]] = icmp ne <8 x i32> [[TMP17]], zeroinitializer
+; CHECK-NEXT:    [[TMP22:%.*]] = sext <8 x i1> [[TMP21]] to <8 x i16>
+; CHECK-NEXT:    [[TMP23:%.*]] = select <8 x i1> [[TMP20]], <8 x i16> [[TMP22]], <8 x i16> [[TMP3]]
 ; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT:    br i1 [[_MSOR7]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
-; CHECK:       [[BB12]]:
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
+; CHECK:       [[BB24]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB13]]:
+; CHECK:       [[BB25]]:
 ; CHECK-NEXT:    [[RES3:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 11, <8 x i16> [[SRC]], i8 [[MASK]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], [[TMP14]]
 ; CHECK-NEXT:    [[RES0:%.*]] = add <8 x i16> [[RES1]], [[RES2]]
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i16> [[TMP23]], [[_MSPROP]]
 ; CHECK-NEXT:    [[RES:%.*]] = add <8 x i16> [[RES3]], [[RES0]]
-; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[RES]]
 ;
   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
@@ -7947,40 +7951,37 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %s
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
 ; CHECK-NEXT:    call void @llvm.donothing()
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
-; CHECK:       [[BB5]]:
-; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
-; CHECK-NEXT:    unreachable
-; CHECK:       [[BB6]]:
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+; CHECK-NEXT:    [[TMP7:%.*]] = select <8 x i1> splat (i1 true), <8 x i16> [[TMP6]], <8 x i16> zeroinitializer
 ; CHECK-NEXT:    [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 2, <8 x i16> zeroinitializer, i8 -1)
-; CHECK-NEXT:    [[TMP7:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT:    [[_MSCMP1:%.*]] = icmp ne i256 [[TMP7]], 0
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP10:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i16>
+; CHECK-NEXT:    [[TMP12:%.*]] = select <8 x i1> [[TMP9]], <8 x i16> [[TMP11]], <8 x i16> zeroinitializer
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT:    [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT:    br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
-; CHECK:       [[BB8]]:
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; CHECK:       [[BB11]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB9]]:
+; CHECK:       [[BB12]]:
 ; CHECK-NEXT:    [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 11, <8 x i16> zeroinitializer, i8 [[MASK]])
-; CHECK-NEXT:    [[TMP10:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT:    [[_MSCMP3:%.*]] = icmp ne i256 [[TMP10]], 0
-; CHECK-NEXT:    [[TMP11:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT:    [[_MSCMP4:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT:    [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT:    [[TMP18:%.*]] = sext <8 x i1> [[TMP17]] to <8 x i16>
+; CHECK-NEXT:    [[TMP19:%.*]] = select <8 x i1> [[TMP16]], <8 x i16> [[TMP18]], <8 x i16> [[TMP3]]
 ; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT:    [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT:    br i1 [[_MSOR7]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
-; CHECK:       [[BB12]]:
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
+; CHECK:       [[BB17]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB13]]:
+; CHECK:       [[BB18]]:
 ; CHECK-NEXT:    [[RES3:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 12, <8 x i16> [[SRC]], i8 [[MASK]])
+; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], [[TMP12]]
 ; CHECK-NEXT:    [[RES0:%.*]] = add <8 x i16> [[RES1]], [[RES2]]
+; CHECK-NEXT:    [[_MSPROP1:%.*]] = or <8 x i16> [[TMP19]], [[_MSPROP]]
 ; CHECK-NEXT:    [[RES:%.*]] = add <8 x i16> [[RES3]], [[RES0]]
-; CHECK-NEXT:    store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT:    store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
 ; CHECK-NEXT:    ret <8 x i16> [[RES]]
 ;
   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)

>From e6d9db068b8895b06e0bebfafd5cee87425376c1 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 03:25:13 +0000
Subject: [PATCH 2/7] Update test comments

---
 .../MemorySanitizer/X86/avx512-intrinsics.ll  |  1 -
 .../X86/avx512vl-intrinsics.ll                | 69 ++++++++++++++++++-
 2 files changed, 68 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
index eba0beb5bf6ac..46fd39be9622b 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
@@ -24,7 +24,6 @@
 ; - llvm.x86.avx512.mask.rndscale.pd.512, llvm.x86.avx512.mask.rndscale.ps.512, llvm.x86.avx512.mask.rndscale.sd, llvm.x86.avx512.mask.rndscale.ss
 ; - llvm.x86.avx512.mask.scalef.pd.512, llvm.x86.avx512.mask.scalef.ps.512
 ; - llvm.x86.avx512.mask.sqrt.sd, llvm.x86.avx512.mask.sqrt.ss
-; - llvm.x86.avx512.mask.vcvtps2ph.512
 ; - llvm.x86.avx512.maskz.fixupimm.pd.512, llvm.x86.avx512.maskz.fixupimm.ps.512, llvm.x86.avx512.maskz.fixupimm.sd, llvm.x86.avx512.maskz.fixupimm.ss
 ; - llvm.x86.avx512.mul.pd.512, llvm.x86.avx512.mul.ps.512
 ; - llvm.x86.avx512.permvar.df.512, llvm.x86.avx512.permvar.sf.512
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
index e22301174a0ca..25e1bedb477c5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
@@ -1,7 +1,74 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
 ; RUN: opt < %s -mattr=+avx512f -passes=msan -S | FileCheck %s
-
+;
 ; Forked from llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+;
+; Handled strictly:
+; - llvm.x86.avx512.mask.cmp.pd.128, llvm.x86.avx512.mask.cmp.pd.256
+; - llvm.x86.avx512.mask.cmp.ps.128, llvm.x86.avx512.mask.cmp.ps.256
+; - llvm.x86.avx512.mask.compress.v2f64, llvm.x86.avx512.mask.compress.v4f32, llvm.x86.avx512.mask.compress.v4f64, llvm.x86.avx512.mask.compress.v8f32
+; - llvm.x86.avx512.mask.compress.v2i64, llvm.x86.avx512.mask.compress.v4i32, llvm.x86.avx512.mask.compress.v4i64, llvm.x86.avx512.mask.compress.v8i32
+; - llvm.x86.avx512.mask.cvtpd2dq.128
+; - llvm.x86.avx512.mask.cvtpd2ps
+; - llvm.x86.avx512.mask.cvtpd2udq.128, llvm.x86.avx512.mask.cvtpd2udq.256
+; - llvm.x86.avx512.mask.cvtps2dq.128, llvm.x86.avx512.mask.cvtps2dq.256
+; - llvm.x86.avx512.mask.cvtps2udq.128, llvm.x86.avx512.mask.cvtps2udq.256
+; - llvm.x86.avx512.mask.cvttpd2dq.128
+; - llvm.x86.avx512.mask.cvttpd2udq.128, llvm.x86.avx512.mask.cvttpd2udq.256
+; - llvm.x86.avx512.mask.cvttps2udq.128, llvm.x86.avx512.mask.cvttps2udq.256
+; - llvm.x86.avx512.mask.expand.v2f64, llvm.x86.avx512.mask.expand.v4f32, llvm.x86.avx512.mask.expand.v4f64, llvm.x86.avx512.mask.expand.v8f32
+; - llvm.x86.avx512.mask.expand.v2i64, llvm.x86.avx512.mask.expand.v4i32, llvm.x86.avx512.mask.expand.v4i64, llvm.x86.avx512.mask.expand.v8i32
+; - llvm.x86.avx512.mask.fixupimm.pd.128, llvm.x86.avx512.mask.fixupimm.pd.256
+; - llvm.x86.avx512.mask.fixupimm.ps.128 llvm.x86.avx512.mask.fixupimm.ps.256
+; - llvm.x86.avx512.mask.getexp.pd.256
+; - llvm.x86.avx512.mask.getexp.ps.256
+; - llvm.x86.avx512.mask.getmant.pd.128, llvm.x86.avx512.mask.getmant.pd.256
+; - llvm.x86.avx512.mask.getmant.ps.128, llvm.x86.avx512.mask.getmant.ps.256
+; - llvm.x86.avx512.mask.pmov.db.128, llvm.x86.avx512.mask.pmov.db.256
+; - llvm.x86.avx512.mask.pmov.db.mem.128, llvm.x86.avx512.mask.pmov.db.mem.256
+; - llvm.x86.avx512.mask.pmov.dw.128, llvm.x86.avx512.mask.pmov.dw.256
+; - llvm.x86.avx512.mask.pmov.dw.mem.128, llvm.x86.avx512.mask.pmov.dw.mem.256
+; - llvm.x86.avx512.mask.pmov.qb.128, llvm.x86.avx512.mask.pmov.qb.256
+; - llvm.x86.avx512.mask.pmov.qb.mem.128, llvm.x86.avx512.mask.pmov.qb.mem.256
+; - llvm.x86.avx512.mask.pmov.qd.128
+; - llvm.x86.avx512.mask.pmov.qd.mem.128, llvm.x86.avx512.mask.pmov.qd.mem.256
+; - llvm.x86.avx512.mask.pmov.qw.128, llvm.x86.avx512.mask.pmov.qw.256
+; - llvm.x86.avx512.mask.pmov.qw.mem.128, llvm.x86.avx512.mask.pmov.qw.mem.256
+; - llvm.x86.avx512.mask.pmovs.db.128, llvm.x86.avx512.mask.pmovs.db.256
+; - llvm.x86.avx512.mask.pmovs.db.mem.128, llvm.x86.avx512.mask.pmovs.db.mem.256
+; - llvm.x86.avx512.mask.pmovs.dw.128, llvm.x86.avx512.mask.pmovs.dw.256
+; - llvm.x86.avx512.mask.pmovs.dw.mem.128, llvm.x86.avx512.mask.pmovs.dw.mem.256
+; - llvm.x86.avx512.mask.pmovs.qb.128, llvm.x86.avx512.mask.pmovs.qb.256
+; - llvm.x86.avx512.mask.pmovs.qb.mem.128, llvm.x86.avx512.mask.pmovs.qb.mem.256
+; - llvm.x86.avx512.mask.pmovs.qd.128, llvm.x86.avx512.mask.pmovs.qd.256
+; - llvm.x86.avx512.mask.pmovs.qd.mem.128, llvm.x86.avx512.mask.pmovs.qd.mem.256
+; - llvm.x86.avx512.mask.pmovs.qw.128, llvm.x86.avx512.mask.pmovs.qw.256
+; - llvm.x86.avx512.mask.pmovs.qw.mem.128, llvm.x86.avx512.mask.pmovs.qw.mem.256
+; - llvm.x86.avx512.mask.pmovus.db.128, llvm.x86.avx512.mask.pmovus.db.256
+; - llvm.x86.avx512.mask.pmovus.db.mem.128, llvm.x86.avx512.mask.pmovus.db.mem.256
+; - llvm.x86.avx512.mask.pmovus.dw.128, llvm.x86.avx512.mask.pmovus.dw.256
+; - llvm.x86.avx512.mask.pmovus.dw.mem.128, llvm.x86.avx512.mask.pmovus.dw.mem.256
+; - llvm.x86.avx512.mask.pmovus.qb.128, llvm.x86.avx512.mask.pmovus.qb.256
+; - llvm.x86.avx512.mask.pmovus.qb.mem.128, llvm.x86.avx512.mask.pmovus.qb.mem.256
+; - llvm.x86.avx512.mask.pmovus.qd.128, llvm.x86.avx512.mask.pmovus.qd.256
+; - llvm.x86.avx512.mask.pmovus.qd.mem.128, llvm.x86.avx512.mask.pmovus.qd.mem.256
+; - llvm.x86.avx512.mask.pmovus.qw.128, llvm.x86.avx512.mask.pmovus.qw.256
+; - llvm.x86.avx512.mask.pmovus.qw.mem.128, llvm.x86.avx512.mask.pmovus.qw.mem.256
+; - llvm.x86.avx512.mask.rndscale.pd.128, llvm.x86.avx512.mask.rndscale.pd.256
+; - llvm.x86.avx512.mask.rndscale.ps.128, llvm.x86.avx512.mask.rndscale.ps.256
+; - llvm.x86.avx512.mask.scalef.pd.128, llvm.x86.avx512.mask.scalef.pd.256
+; - llvm.x86.avx512.mask.scalef.ps.128, llvm.x86.avx512.mask.scalef.ps.256
+; - llvm.x86.avx512.maskz.fixupimm.pd.128, llvm.x86.avx512.maskz.fixupimm.pd.256
+; - llvm.x86.avx512.maskz.fixupimm.ps.128, llvm.x86.avx512.maskz.fixupimm.ps.256
+; - llvm.x86.avx512.permvar.df.256
+; - llvm.x86.avx512.pternlog.d.128, llvm.x86.avx512.pternlog.d.256
+; - llvm.x86.avx512.pternlog.q.128, llvm.x86.avx512.pternlog.q.256
+; - llvm.x86.avx512.rcp14.pd.128, llvm.x86.avx512.rcp14.pd.256
+; - llvm.x86.avx512.rcp14.ps.128, llvm.x86.avx512.rcp14.ps.256
+; - llvm.x86.avx512.rsqrt14.pd.128, llvm.x86.avx512.rsqrt14.pd.256
+; - llvm.x86.avx512.rsqrt14.ps.128, llvm.x86.avx512.rsqrt14.ps.256
+;
+; Handled heuristically: (none)
 
 target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"

>From 6bd7941614252513d84ed890796b32920086570b Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 03:26:31 +0000
Subject: [PATCH 3/7] Add maybe_unused

---
 llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 13262c2c8b36f..eab7651ae0707 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4571,7 +4571,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
     unsigned ANumElements =
         cast<FixedVectorType>(A->getType())->getNumElements();
-    unsigned WriteThruNumElements =
+    [[maybe_unused]] unsigned WriteThruNumElements =
         cast<FixedVectorType>(WriteThrough->getType())->getNumElements();
     assert(ANumElements == WriteThruNumElements ||
            ANumElements * 2 == WriteThruNumElements);

>From a5d73636a0d38e356e7e305d300e89172a3894a3 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 05:13:34 +0000
Subject: [PATCH 4/7] Add note about mask uninitialized bits

---
 llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index eab7651ae0707..98481ac7ecb42 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4583,6 +4583,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
 
     assert(WriteThruNumElements == MaskNumElements);
 
+    // Some bits of the mask may be unused, though it's unusual to have partly
+    // uninitialized bits.
     insertCheckShadowOf(Mask, &I);
 
     assert(RoundingMode->getType()->isIntegerTy());

>From 6ac0c664d6f433865c0ed00fba89ae0fb348df4c Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 05:24:54 +0000
Subject: [PATCH 5/7] Add labels

---
 .../Instrumentation/MemorySanitizer.cpp       | 14 +++++++----
 .../MemorySanitizer/X86/avx512-intrinsics.ll  | 22 ++++++++---------
 .../X86/avx512vl-intrinsics.ll                | 24 +++++++++----------
 3 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 98481ac7ecb42..9eca1fb35ddf4 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4601,13 +4601,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     if (ANumElements * 2 == MaskNumElements) {
       // Ensure that the irrelevant bits of the mask are zero, hence selecting
       // from the zeroed shadow instead of the writethrough's shadow.
-      Mask = IRB.CreateTrunc(Mask, IRB.getIntNTy(ANumElements));
-      Mask = IRB.CreateZExt(Mask, IRB.getIntNTy(MaskNumElements));
+      Mask =
+          IRB.CreateTrunc(Mask, IRB.getIntNTy(ANumElements), "_ms_mask_trunc");
+      Mask =
+          IRB.CreateZExt(Mask, IRB.getIntNTy(MaskNumElements), "_ms_mask_zext");
     }
 
     // Convert i16 mask to <16 x i1>
     Mask = IRB.CreateBitCast(
-        Mask, FixedVectorType::get(IRB.getInt1Ty(), MaskNumElements));
+        Mask, FixedVectorType::get(IRB.getInt1Ty(), MaskNumElements),
+        "_ms_mask_bitcast");
 
     /// For floating-point to integer conversion, the output is:
     /// - fully uninitialized if *any* bit of the input is uninitialized
@@ -4617,10 +4620,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     /// We use the scalar width of the return type instead of A's.
     AShadow = IRB.CreateSExt(
         IRB.CreateICmpNE(AShadow, getCleanShadow(AShadow->getType())),
-        getShadowTy(&I));
+        getShadowTy(&I), "_ms_a_shadow");
 
     Value *WriteThroughShadow = getShadow(WriteThrough);
-    Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow);
+    Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow,
+                                     "_ms_writethru_select");
 
     setShadow(&I, Shadow);
     setOriginForNaryOp(I);
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
index 46fd39be9622b..a2f1d65e7cd41 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
@@ -1911,29 +1911,29 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16
 ; CHECK-NEXT:    [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP13:%.*]] = select <16 x i1> [[TMP10]], <16 x i16> [[TMP12]], <16 x i16> zeroinitializer
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
-; CHECK:       12:
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK:       7:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       13:
+; CHECK:       8:
 ; CHECK-NEXT:    [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK]])
 ; CHECK-NEXT:    [[TMP25:%.*]] = bitcast i16 [[MASK]] to <16 x i1>
 ; CHECK-NEXT:    [[TMP26:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP27:%.*]] = sext <16 x i1> [[TMP26]] to <16 x i16>
 ; CHECK-NEXT:    [[TMP20:%.*]] = select <16 x i1> [[TMP25]], <16 x i16> [[TMP27]], <16 x i16> [[TMP3]]
 ; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP6]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
-; CHECK:       18:
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
+; CHECK:       10:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       19:
+; CHECK:       11:
 ; CHECK-NEXT:    [[RES3:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 12, <16 x i16> [[SRC:%.*]], i16 [[MASK]])
 ; CHECK-NEXT:    [[_MSCMP8:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP8]], label [[TMP24:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
-; CHECK:       20:
+; CHECK-NEXT:    br i1 [[_MSCMP8]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]]
+; CHECK:       12:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       21:
+; CHECK:       13:
 ; CHECK-NEXT:    [[TMP17:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
 ; CHECK-NEXT:    [[TMP18:%.*]] = xor i64 [[TMP17]], 87960930222080
 ; CHECK-NEXT:    [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
@@ -7446,10 +7446,10 @@ define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x
 ; CHECK-NEXT:    [[TMP6:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[TMP5]], <16 x i32> [[TMP2]]
 ; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i16 [[TMP10]], 0
 ; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK:       8:
+; CHECK:       5:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR10]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       9:
+; CHECK:       6:
 ; CHECK-NEXT:    [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2]], i32 10)
 ; CHECK-NEXT:    [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP8:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
index 25e1bedb477c5..e2dc8cbdca968 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
@@ -7973,11 +7973,11 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %s
 ; CHECK-NEXT:    [[TMP13:%.*]] = sext <8 x i1> [[TMP12]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP14:%.*]] = select <8 x i1> [[TMP11]], <8 x i16> [[TMP13]], <8 x i16> zeroinitializer
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]]
-; CHECK:       [[BB15]]:
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK:       [[BB8]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB16]]:
+; CHECK:       [[BB9]]:
 ; CHECK-NEXT:    [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 10, <8 x i16> zeroinitializer, i8 [[MASK]])
 ; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    [[TMP18:%.*]] = trunc i8 [[MASK]] to i4
@@ -7987,11 +7987,11 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %s
 ; CHECK-NEXT:    [[TMP22:%.*]] = sext <8 x i1> [[TMP21]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP23:%.*]] = select <8 x i1> [[TMP20]], <8 x i16> [[TMP22]], <8 x i16> [[TMP3]]
 ; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP6]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK:       [[BB24]]:
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
+; CHECK:       [[BB12]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB25]]:
+; CHECK:       [[BB13]]:
 ; CHECK-NEXT:    [[RES3:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 11, <8 x i16> [[SRC]], i8 [[MASK]])
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], [[TMP14]]
 ; CHECK-NEXT:    [[RES0:%.*]] = add <8 x i16> [[RES1]], [[RES2]]
@@ -8027,22 +8027,22 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %s
 ; CHECK-NEXT:    [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP12:%.*]] = select <8 x i1> [[TMP9]], <8 x i16> [[TMP11]], <8 x i16> zeroinitializer
 ; CHECK-NEXT:    [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP2]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
-; CHECK:       [[BB11]]:
+; CHECK-NEXT:    br i1 [[_MSCMP2]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; CHECK:       [[BB6]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB12]]:
+; CHECK:       [[BB7]]:
 ; CHECK-NEXT:    [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 11, <8 x i16> zeroinitializer, i8 [[MASK]])
 ; CHECK-NEXT:    [[TMP16:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
 ; CHECK-NEXT:    [[TMP17:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
 ; CHECK-NEXT:    [[TMP18:%.*]] = sext <8 x i1> [[TMP17]] to <8 x i16>
 ; CHECK-NEXT:    [[TMP19:%.*]] = select <8 x i1> [[TMP16]], <8 x i16> [[TMP18]], <8 x i16> [[TMP3]]
 ; CHECK-NEXT:    [[_MSCMP6:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT:    br i1 [[_MSCMP6]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK:       [[BB17]]:
+; CHECK-NEXT:    br i1 [[_MSCMP6]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]]
+; CHECK:       [[BB9]]:
 ; CHECK-NEXT:    call void @__msan_warning_noreturn() #[[ATTR6]]
 ; CHECK-NEXT:    unreachable
-; CHECK:       [[BB18]]:
+; CHECK:       [[BB10]]:
 ; CHECK-NEXT:    [[RES3:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 12, <8 x i16> [[SRC]], i8 [[MASK]])
 ; CHECK-NEXT:    [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], [[TMP12]]
 ; CHECK-NEXT:    [[RES0:%.*]] = add <8 x i16> [[RES1]], [[RES2]]

>From 6e8d1eafd27407b1dc0258259a475eb480c59f70 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 16:15:10 +0000
Subject: [PATCH 6/7] Update comments

---
 llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 9eca1fb35ddf4..5a8b3e2dcfdb5 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -5385,6 +5385,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       break;
     }
 
+    // Convert Packed Double Precision Floating-Point Values
+    //   to Packed Single PrecisionFloating-Point Values
     case Intrinsic::x86_sse2_cvtpd2ps:
     case Intrinsic::x86_sse2_cvtps2dq:
     case Intrinsic::x86_sse2_cvtpd2dq:
@@ -5399,6 +5401,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       break;
     }
 
+    // Shift Packed Data (Left Logical, Right Arithmetic, Right Logical)
     case Intrinsic::x86_avx512_psll_w_512:
     case Intrinsic::x86_avx512_psll_d_512:
     case Intrinsic::x86_avx512_psll_q_512:
@@ -5966,7 +5969,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       break;
 
     // Convert Packed Single Precision Floating-Point Values
-    //   to Packed SignedDoubleword Integer Values
+    //   to Packed Signed Doubleword Integer Values
     //
     // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
     //                (<16 x float>, <16 x i32>, i16, i32)

>From 1b4441bf3b7e0d56f74969033deb31c8e26a6a5d Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 16:17:39 +0000
Subject: [PATCH 7/7] Move convert handlers closer together, update comments

---
 .../Instrumentation/MemorySanitizer.cpp       | 52 ++++++++++---------
 1 file changed, 28 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 5a8b3e2dcfdb5..d862c3a46e6cb 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -5345,6 +5345,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case Intrinsic::x86_sse_ldmxcsr:
       handleLdmxcsr(I);
       break;
+
+    // Convert Scalar Double Precision Floating-Point Value
+    //   to Unsigned DoublewordInteger
+    // etc.
     case Intrinsic::x86_avx512_vcvtsd2usi64:
     case Intrinsic::x86_avx512_vcvtsd2usi32:
     case Intrinsic::x86_avx512_vcvtss2usi64:
@@ -5386,7 +5390,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
 
     // Convert Packed Double Precision Floating-Point Values
-    //   to Packed Single PrecisionFloating-Point Values
+    //   to Packed Single Precision Floating-Point Values
     case Intrinsic::x86_sse2_cvtpd2ps:
     case Intrinsic::x86_sse2_cvtps2dq:
     case Intrinsic::x86_sse2_cvtpd2dq:
@@ -5401,6 +5405,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
       break;
     }
 
+    // Convert Packed Single Precision Floating-Point Values
+    //   to Packed Signed Doubleword Integer Values
+    //
+    // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
+    //                (<16 x float>, <16 x i32>, i16, i32)
+    case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
+      handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
+      break;
+    }
+
+    // Convert Single-Precision FP Value to 16-bit FP Value
+    // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
+    //                (<16 x float>, i32, <16 x i16>, i16)
+    //  <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
+    //                (<4 x float>, i32, <8 x i16>, i8)
+    //  <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
+    //                (<8 x float>, i32, <8 x i16>, i8)
+    case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
+    case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
+    case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
+      handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true);
+      break;
+
     // Shift Packed Data (Left Logical, Right Arithmetic, Right Logical)
     case Intrinsic::x86_avx512_psll_w_512:
     case Intrinsic::x86_avx512_psll_d_512:
@@ -5968,29 +5995,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                         /*trailingVerbatimArgs=*/1);
       break;
 
-    // Convert Packed Single Precision Floating-Point Values
-    //   to Packed Signed Doubleword Integer Values
-    //
-    // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
-    //                (<16 x float>, <16 x i32>, i16, i32)
-    case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
-      handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
-      break;
-    }
-
-    // Convert Single-Precision FP Value to 16-bit FP Value
-    // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
-    //                (<16 x float>, i32, <16 x i16>, i16)
-    //  <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
-    //                (<4 x float>, i32, <8 x i16>, i8)
-    //  <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
-    //                (<8 x float>, i32, <8 x i16>, i8)
-    case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
-    case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
-    case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
-      handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true);
-      break;
-
     // AVX512 PMOV: Packed MOV, with truncation
     // Precisely handled by applying the same intrinsic to the shadow
     case Intrinsic::x86_avx512_mask_pmov_dw_512: