[llvm] [msan] Handle AVX512 VCVTPS2PH (PR #154460)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 20 09:18:00 PDT 2025
https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/154460
>From b2ff7746d1c504fb60a12216a04cdf0e409203a8 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 02:59:27 +0000
Subject: [PATCH 1/7] [msan] Handle AVX512 VCVTPS2PH
This extends maybeExtendVectorShadowWithZeros() from 556c8467d15a131552e3c84478d768bafd95d4e6 (https://github.com/llvm/llvm-project/pull/147377)
to handle AVX512 VCVTPS2PH.
---
.../Instrumentation/MemorySanitizer.cpp | 103 ++++++++++++++----
.../MemorySanitizer/X86/avx512-intrinsics.ll | 52 ++++-----
.../X86/avx512vl-intrinsics.ll | 93 ++++++++--------
3 files changed, 151 insertions(+), 97 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 948e2c6e06843..13262c2c8b36f 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3429,8 +3429,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return ShadowType;
}
- /// Doubles the length of a vector shadow (filled with zeros) if necessary to
- /// match the length of the shadow for the instruction.
+ /// Doubles the length of a vector shadow (extending with zeros) if necessary
+ /// to match the length of the shadow for the instruction.
+ /// If scalar types of the vectors are different, it will use the type of the
+ /// input vector.
/// This is more type-safe than CreateShadowCast().
Value *maybeExtendVectorShadowWithZeros(Value *Shadow, IntrinsicInst &I) {
IRBuilder<> IRB(&I);
@@ -3440,10 +3442,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *FullShadow = getCleanShadow(&I);
assert(cast<FixedVectorType>(Shadow->getType())->getNumElements() <=
cast<FixedVectorType>(FullShadow->getType())->getNumElements());
- assert(cast<FixedVectorType>(Shadow->getType())->getScalarType() ==
- cast<FixedVectorType>(FullShadow->getType())->getScalarType());
- if (Shadow->getType() == FullShadow->getType()) {
+ if (cast<FixedVectorType>(Shadow->getType())->getNumElements() ==
+ cast<FixedVectorType>(FullShadow->getType())->getNumElements()) {
FullShadow = Shadow;
} else {
// TODO: generalize beyond 2x?
@@ -4528,55 +4529,93 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return isFixedFPVectorTy(V->getType());
}
- // e.g., call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
- // (<16 x float> a, <16 x i32> writethru, i16 mask,
- // i32 rounding)
+ // e.g., <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
+ // (<16 x float> a, <16 x i32> writethru, i16 mask,
+ // i32 rounding)
+ //
+ // Inconveniently, some similar intrinsics have a different operand order:
+ // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
+ // (<16 x float> a, i32 rounding, <16 x i16> writethru,
+ // i16 mask)
+ //
+ // If the return type has more elements than A, the excess elements are
+ // zeroed (and the corresponding shadow is initialized).
+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
+ // (<4 x float> a, i32 rounding, <8 x i16> writethru,
+ // i8 mask)
//
// dst[i] = mask[i] ? convert(a[i]) : writethru[i]
// dst_shadow[i] = mask[i] ? all_or_nothing(a_shadow[i]) : writethru_shadow[i]
// where all_or_nothing(x) is fully uninitialized if x has any
// uninitialized bits
- void handleAVX512VectorConvertFPToInt(IntrinsicInst &I) {
+ void handleAVX512VectorConvertFPToInt(IntrinsicInst &I, bool LastMask) {
IRBuilder<> IRB(&I);
assert(I.arg_size() == 4);
Value *A = I.getOperand(0);
- Value *WriteThrough = I.getOperand(1);
- Value *Mask = I.getOperand(2);
- Value *RoundingMode = I.getOperand(3);
+ Value *WriteThrough;
+ Value *Mask;
+ Value *RoundingMode;
+ if (LastMask) {
+ WriteThrough = I.getOperand(2);
+ Mask = I.getOperand(3);
+ RoundingMode = I.getOperand(1);
+ } else {
+ WriteThrough = I.getOperand(1);
+ Mask = I.getOperand(2);
+ RoundingMode = I.getOperand(3);
+ }
assert(isFixedFPVector(A));
assert(isFixedIntVector(WriteThrough));
unsigned ANumElements =
cast<FixedVectorType>(A->getType())->getNumElements();
- assert(ANumElements ==
- cast<FixedVectorType>(WriteThrough->getType())->getNumElements());
+ unsigned WriteThruNumElements =
+ cast<FixedVectorType>(WriteThrough->getType())->getNumElements();
+ assert(ANumElements == WriteThruNumElements ||
+ ANumElements * 2 == WriteThruNumElements);
assert(Mask->getType()->isIntegerTy());
- assert(Mask->getType()->getScalarSizeInBits() == ANumElements);
+ unsigned MaskNumElements = Mask->getType()->getScalarSizeInBits();
+ assert(ANumElements == MaskNumElements ||
+ ANumElements * 2 == MaskNumElements);
+
+ assert(WriteThruNumElements == MaskNumElements);
+
insertCheckShadowOf(Mask, &I);
assert(RoundingMode->getType()->isIntegerTy());
- // Only four bits of the rounding mode are used, though it's very
+ // Only some bits of the rounding mode are used, though it's very
// unusual to have uninitialized bits there (more commonly, it's a
// constant).
insertCheckShadowOf(RoundingMode, &I);
assert(I.getType() == WriteThrough->getType());
+ Value *AShadow = getShadow(A);
+ AShadow = maybeExtendVectorShadowWithZeros(AShadow, I);
+
+ if (ANumElements * 2 == MaskNumElements) {
+ // Ensure that the irrelevant bits of the mask are zero, hence selecting
+ // from the zeroed shadow instead of the writethrough's shadow.
+ Mask = IRB.CreateTrunc(Mask, IRB.getIntNTy(ANumElements));
+ Mask = IRB.CreateZExt(Mask, IRB.getIntNTy(MaskNumElements));
+ }
+
// Convert i16 mask to <16 x i1>
Mask = IRB.CreateBitCast(
- Mask, FixedVectorType::get(IRB.getInt1Ty(), ANumElements));
+ Mask, FixedVectorType::get(IRB.getInt1Ty(), MaskNumElements));
- Value *AShadow = getShadow(A);
- /// For scalars:
- /// Since they are converting from floating-point, the output is:
+ /// For floating-point to integer conversion, the output is:
/// - fully uninitialized if *any* bit of the input is uninitialized
/// - fully ininitialized if all bits of the input are ininitialized
/// We apply the same principle on a per-element basis for vectors.
- AShadow = IRB.CreateSExt(IRB.CreateICmpNE(AShadow, getCleanShadow(A)),
- getShadowTy(A));
+ ///
+ /// We use the scalar width of the return type instead of A's.
+ AShadow = IRB.CreateSExt(
+ IRB.CreateICmpNE(AShadow, getCleanShadow(AShadow->getType())),
+ getShadowTy(&I));
Value *WriteThroughShadow = getShadow(WriteThrough);
Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow);
@@ -5920,11 +5959,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/*trailingVerbatimArgs=*/1);
break;
+ // Convert Packed Single Precision Floating-Point Values
+ // to Packed SignedDoubleword Integer Values
+ //
+ // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
+ // (<16 x float>, <16 x i32>, i16, i32)
case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
- handleAVX512VectorConvertFPToInt(I);
+ handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
break;
}
+ // Convert Single-Precision FP Value to 16-bit FP Value
+ // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
+ // (<16 x float>, i32, <16 x i16>, i16)
+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
+ // (<4 x float>, i32, <8 x i16>, i8)
+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
+ // (<8 x float>, i32, <8 x i16>, i8)
+ case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
+ case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
+ case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
+ handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true);
+ break;
+
// AVX512 PMOV: Packed MOV, with truncation
// Precisely handled by applying the same intrinsic to the shadow
case Intrinsic::x86_avx512_mask_pmov_dw_512:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
index 3c1af6781f0ed..eba0beb5bf6ac 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
@@ -1903,50 +1903,46 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i16>
+; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> splat (i1 true), <16 x i16> [[TMP7]], <16 x i16> zeroinitializer
; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0:%.*]], i32 2, <16 x i16> zeroinitializer, i16 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP10]], <16 x i16> [[TMP12]], <16 x i16> zeroinitializer
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 12:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i16> [[TMP3]] to i256
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
; CHECK: 13:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK]])
+; CHECK-NEXT: [[TMP25:%.*]] = bitcast i16 [[MASK]] to <16 x i1>
+; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = sext <16 x i1> [[TMP26]] to <16 x i16>
+; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP25]], <16 x i16> [[TMP27]], <16 x i16> [[TMP3]]
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
+; CHECK: 18:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 14:
+; CHECK: 19:
; CHECK-NEXT: [[RES3:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 12, <16 x i16> [[SRC:%.*]], i16 [[MASK]])
; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
-; CHECK: 15:
+; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP24:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
+; CHECK: 20:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 16:
+; CHECK: 21:
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 87960930222080
; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
-; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr [[TMP19]], align 32
+; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP19]], align 32
; CHECK-NEXT: store <16 x i16> [[RES1]], ptr [[DST]], align 32
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP13]], [[TMP20]]
; CHECK-NEXT: [[RES:%.*]] = add <16 x i16> [[RES2]], [[RES3]]
-; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
;
%res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
index db0c2e7ae9ed6..e22301174a0ca 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
@@ -7893,40 +7893,44 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %s
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
-; CHECK: [[BB5]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> bitcast (<1 x i8> splat (i8 15) to <8 x i1>), <8 x i16> [[TMP6]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 2, <8 x i16> zeroinitializer, i8 -1)
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i8 [[MASK]] to i4
+; CHECK-NEXT: [[TMP10:%.*]] = zext i4 [[TMP9]] to i8
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP10]] to <8 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <8 x i1> [[TMP12]] to <8 x i16>
+; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP11]], <8 x i16> [[TMP13]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
-; CHECK: [[BB8]]:
+; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]]
+; CHECK: [[BB15]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB9]]:
+; CHECK: [[BB16]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 10, <8 x i16> zeroinitializer, i8 [[MASK]])
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP18:%.*]] = trunc i8 [[MASK]] to i4
+; CHECK-NEXT: [[TMP19:%.*]] = zext i4 [[TMP18]] to i8
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP19]] to <8 x i1>
+; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <8 x i32> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP21]] to <8 x i16>
+; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP20]], <8 x i16> [[TMP22]], <8 x i16> [[TMP3]]
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
-; CHECK: [[BB12]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
+; CHECK: [[BB24]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB13]]:
+; CHECK: [[BB25]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 11, <8 x i16> [[SRC]], i8 [[MASK]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], [[TMP14]]
; CHECK-NEXT: [[RES0:%.*]] = add <8 x i16> [[RES1]], [[RES2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP23]], [[_MSPROP]]
; CHECK-NEXT: [[RES:%.*]] = add <8 x i16> [[RES3]], [[RES0]]
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[RES]]
;
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
@@ -7947,40 +7951,37 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %s
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
-; CHECK: [[BB5]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> splat (i1 true), <8 x i16> [[TMP6]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 2, <8 x i16> zeroinitializer, i8 -1)
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP9]], <8 x i16> [[TMP11]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
-; CHECK: [[BB8]]:
+; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; CHECK: [[BB11]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB9]]:
+; CHECK: [[BB12]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 11, <8 x i16> zeroinitializer, i8 [[MASK]])
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = sext <8 x i1> [[TMP17]] to <8 x i16>
+; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP16]], <8 x i16> [[TMP18]], <8 x i16> [[TMP3]]
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
-; CHECK: [[BB12]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
+; CHECK: [[BB17]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB13]]:
+; CHECK: [[BB18]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 12, <8 x i16> [[SRC]], i8 [[MASK]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], [[TMP12]]
; CHECK-NEXT: [[RES0:%.*]] = add <8 x i16> [[RES1]], [[RES2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP19]], [[_MSPROP]]
; CHECK-NEXT: [[RES:%.*]] = add <8 x i16> [[RES3]], [[RES0]]
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[RES]]
;
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
>From e6d9db068b8895b06e0bebfafd5cee87425376c1 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 03:25:13 +0000
Subject: [PATCH 2/7] Update test comments
---
.../MemorySanitizer/X86/avx512-intrinsics.ll | 1 -
.../X86/avx512vl-intrinsics.ll | 69 ++++++++++++++++++-
2 files changed, 68 insertions(+), 2 deletions(-)
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
index eba0beb5bf6ac..46fd39be9622b 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
@@ -24,7 +24,6 @@
; - llvm.x86.avx512.mask.rndscale.pd.512, llvm.x86.avx512.mask.rndscale.ps.512, llvm.x86.avx512.mask.rndscale.sd, llvm.x86.avx512.mask.rndscale.ss
; - llvm.x86.avx512.mask.scalef.pd.512, llvm.x86.avx512.mask.scalef.ps.512
; - llvm.x86.avx512.mask.sqrt.sd, llvm.x86.avx512.mask.sqrt.ss
-; - llvm.x86.avx512.mask.vcvtps2ph.512
; - llvm.x86.avx512.maskz.fixupimm.pd.512, llvm.x86.avx512.maskz.fixupimm.ps.512, llvm.x86.avx512.maskz.fixupimm.sd, llvm.x86.avx512.maskz.fixupimm.ss
; - llvm.x86.avx512.mul.pd.512, llvm.x86.avx512.mul.ps.512
; - llvm.x86.avx512.permvar.df.512, llvm.x86.avx512.permvar.sf.512
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
index e22301174a0ca..25e1bedb477c5 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
@@ -1,7 +1,74 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -mattr=+avx512f -passes=msan -S | FileCheck %s
-
+;
; Forked from llvm/test/CodeGen/X86/avx512vl-intrinsics.ll
+;
+; Handled strictly:
+; - llvm.x86.avx512.mask.cmp.pd.128, llvm.x86.avx512.mask.cmp.pd.256
+; - llvm.x86.avx512.mask.cmp.ps.128, llvm.x86.avx512.mask.cmp.ps.256
+; - llvm.x86.avx512.mask.compress.v2f64, llvm.x86.avx512.mask.compress.v4f32, llvm.x86.avx512.mask.compress.v4f64, llvm.x86.avx512.mask.compress.v8f32
+; - llvm.x86.avx512.mask.compress.v2i64, llvm.x86.avx512.mask.compress.v4i32, llvm.x86.avx512.mask.compress.v4i64, llvm.x86.avx512.mask.compress.v8i32
+; - llvm.x86.avx512.mask.cvtpd2dq.128
+; - llvm.x86.avx512.mask.cvtpd2ps
+; - llvm.x86.avx512.mask.cvtpd2udq.128, llvm.x86.avx512.mask.cvtpd2udq.256
+; - llvm.x86.avx512.mask.cvtps2dq.128, llvm.x86.avx512.mask.cvtps2dq.256
+; - llvm.x86.avx512.mask.cvtps2udq.128, llvm.x86.avx512.mask.cvtps2udq.256
+; - llvm.x86.avx512.mask.cvttpd2dq.128
+; - llvm.x86.avx512.mask.cvttpd2udq.128, llvm.x86.avx512.mask.cvttpd2udq.256
+; - llvm.x86.avx512.mask.cvttps2udq.128, llvm.x86.avx512.mask.cvttps2udq.256
+; - llvm.x86.avx512.mask.expand.v2f64, llvm.x86.avx512.mask.expand.v4f32, llvm.x86.avx512.mask.expand.v4f64, llvm.x86.avx512.mask.expand.v8f32
+; - llvm.x86.avx512.mask.expand.v2i64, llvm.x86.avx512.mask.expand.v4i32, llvm.x86.avx512.mask.expand.v4i64, llvm.x86.avx512.mask.expand.v8i32
+; - llvm.x86.avx512.mask.fixupimm.pd.128, llvm.x86.avx512.mask.fixupimm.pd.256
+; - llvm.x86.avx512.mask.fixupimm.ps.128 llvm.x86.avx512.mask.fixupimm.ps.256
+; - llvm.x86.avx512.mask.getexp.pd.256
+; - llvm.x86.avx512.mask.getexp.ps.256
+; - llvm.x86.avx512.mask.getmant.pd.128, llvm.x86.avx512.mask.getmant.pd.256
+; - llvm.x86.avx512.mask.getmant.ps.128, llvm.x86.avx512.mask.getmant.ps.256
+; - llvm.x86.avx512.mask.pmov.db.128, llvm.x86.avx512.mask.pmov.db.256
+; - llvm.x86.avx512.mask.pmov.db.mem.128, llvm.x86.avx512.mask.pmov.db.mem.256
+; - llvm.x86.avx512.mask.pmov.dw.128, llvm.x86.avx512.mask.pmov.dw.256
+; - llvm.x86.avx512.mask.pmov.dw.mem.128, llvm.x86.avx512.mask.pmov.dw.mem.256
+; - llvm.x86.avx512.mask.pmov.qb.128, llvm.x86.avx512.mask.pmov.qb.256
+; - llvm.x86.avx512.mask.pmov.qb.mem.128, llvm.x86.avx512.mask.pmov.qb.mem.256
+; - llvm.x86.avx512.mask.pmov.qd.128
+; - llvm.x86.avx512.mask.pmov.qd.mem.128, llvm.x86.avx512.mask.pmov.qd.mem.256
+; - llvm.x86.avx512.mask.pmov.qw.128, llvm.x86.avx512.mask.pmov.qw.256
+; - llvm.x86.avx512.mask.pmov.qw.mem.128, llvm.x86.avx512.mask.pmov.qw.mem.256
+; - llvm.x86.avx512.mask.pmovs.db.128, llvm.x86.avx512.mask.pmovs.db.256
+; - llvm.x86.avx512.mask.pmovs.db.mem.128, llvm.x86.avx512.mask.pmovs.db.mem.256
+; - llvm.x86.avx512.mask.pmovs.dw.128, llvm.x86.avx512.mask.pmovs.dw.256
+; - llvm.x86.avx512.mask.pmovs.dw.mem.128, llvm.x86.avx512.mask.pmovs.dw.mem.256
+; - llvm.x86.avx512.mask.pmovs.qb.128, llvm.x86.avx512.mask.pmovs.qb.256
+; - llvm.x86.avx512.mask.pmovs.qb.mem.128, llvm.x86.avx512.mask.pmovs.qb.mem.256
+; - llvm.x86.avx512.mask.pmovs.qd.128, llvm.x86.avx512.mask.pmovs.qd.256
+; - llvm.x86.avx512.mask.pmovs.qd.mem.128, llvm.x86.avx512.mask.pmovs.qd.mem.256
+; - llvm.x86.avx512.mask.pmovs.qw.128, llvm.x86.avx512.mask.pmovs.qw.256
+; - llvm.x86.avx512.mask.pmovs.qw.mem.128, llvm.x86.avx512.mask.pmovs.qw.mem.256
+; - llvm.x86.avx512.mask.pmovus.db.128, llvm.x86.avx512.mask.pmovus.db.256
+; - llvm.x86.avx512.mask.pmovus.db.mem.128, llvm.x86.avx512.mask.pmovus.db.mem.256
+; - llvm.x86.avx512.mask.pmovus.dw.128, llvm.x86.avx512.mask.pmovus.dw.256
+; - llvm.x86.avx512.mask.pmovus.dw.mem.128, llvm.x86.avx512.mask.pmovus.dw.mem.256
+; - llvm.x86.avx512.mask.pmovus.qb.128, llvm.x86.avx512.mask.pmovus.qb.256
+; - llvm.x86.avx512.mask.pmovus.qb.mem.128, llvm.x86.avx512.mask.pmovus.qb.mem.256
+; - llvm.x86.avx512.mask.pmovus.qd.128, llvm.x86.avx512.mask.pmovus.qd.256
+; - llvm.x86.avx512.mask.pmovus.qd.mem.128, llvm.x86.avx512.mask.pmovus.qd.mem.256
+; - llvm.x86.avx512.mask.pmovus.qw.128, llvm.x86.avx512.mask.pmovus.qw.256
+; - llvm.x86.avx512.mask.pmovus.qw.mem.128, llvm.x86.avx512.mask.pmovus.qw.mem.256
+; - llvm.x86.avx512.mask.rndscale.pd.128, llvm.x86.avx512.mask.rndscale.pd.256
+; - llvm.x86.avx512.mask.rndscale.ps.128, llvm.x86.avx512.mask.rndscale.ps.256
+; - llvm.x86.avx512.mask.scalef.pd.128, llvm.x86.avx512.mask.scalef.pd.256
+; - llvm.x86.avx512.mask.scalef.ps.128, llvm.x86.avx512.mask.scalef.ps.256
+; - llvm.x86.avx512.maskz.fixupimm.pd.128, llvm.x86.avx512.maskz.fixupimm.pd.256
+; - llvm.x86.avx512.maskz.fixupimm.ps.128, llvm.x86.avx512.maskz.fixupimm.ps.256
+; - llvm.x86.avx512.permvar.df.256
+; - llvm.x86.avx512.pternlog.d.128, llvm.x86.avx512.pternlog.d.256
+; - llvm.x86.avx512.pternlog.q.128, llvm.x86.avx512.pternlog.q.256
+; - llvm.x86.avx512.rcp14.pd.128, llvm.x86.avx512.rcp14.pd.256
+; - llvm.x86.avx512.rcp14.ps.128, llvm.x86.avx512.rcp14.ps.256
+; - llvm.x86.avx512.rsqrt14.pd.128, llvm.x86.avx512.rsqrt14.pd.256
+; - llvm.x86.avx512.rsqrt14.ps.128, llvm.x86.avx512.rsqrt14.ps.256
+;
+; Handled heuristically: (none)
target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
>From 6bd7941614252513d84ed890796b32920086570b Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 03:26:31 +0000
Subject: [PATCH 3/7] Add maybe_unused
---
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 13262c2c8b36f..eab7651ae0707 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4571,7 +4571,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
unsigned ANumElements =
cast<FixedVectorType>(A->getType())->getNumElements();
- unsigned WriteThruNumElements =
+ [[maybe_unused]] unsigned WriteThruNumElements =
cast<FixedVectorType>(WriteThrough->getType())->getNumElements();
assert(ANumElements == WriteThruNumElements ||
ANumElements * 2 == WriteThruNumElements);
>From a5d73636a0d38e356e7e305d300e89172a3894a3 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 05:13:34 +0000
Subject: [PATCH 4/7] Add note about mask uninitialized bits
---
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 2 ++
1 file changed, 2 insertions(+)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index eab7651ae0707..98481ac7ecb42 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4583,6 +4583,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
assert(WriteThruNumElements == MaskNumElements);
+ // Some bits of the mask may be unused, though it's unusual to have partly
+ // uninitialized bits.
insertCheckShadowOf(Mask, &I);
assert(RoundingMode->getType()->isIntegerTy());
>From 6ac0c664d6f433865c0ed00fba89ae0fb348df4c Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 05:24:54 +0000
Subject: [PATCH 5/7] Add labels
---
.../Instrumentation/MemorySanitizer.cpp | 14 +++++++----
.../MemorySanitizer/X86/avx512-intrinsics.ll | 22 ++++++++---------
.../X86/avx512vl-intrinsics.ll | 24 +++++++++----------
3 files changed, 32 insertions(+), 28 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 98481ac7ecb42..9eca1fb35ddf4 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4601,13 +4601,16 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
if (ANumElements * 2 == MaskNumElements) {
// Ensure that the irrelevant bits of the mask are zero, hence selecting
// from the zeroed shadow instead of the writethrough's shadow.
- Mask = IRB.CreateTrunc(Mask, IRB.getIntNTy(ANumElements));
- Mask = IRB.CreateZExt(Mask, IRB.getIntNTy(MaskNumElements));
+ Mask =
+ IRB.CreateTrunc(Mask, IRB.getIntNTy(ANumElements), "_ms_mask_trunc");
+ Mask =
+ IRB.CreateZExt(Mask, IRB.getIntNTy(MaskNumElements), "_ms_mask_zext");
}
// Convert i16 mask to <16 x i1>
Mask = IRB.CreateBitCast(
- Mask, FixedVectorType::get(IRB.getInt1Ty(), MaskNumElements));
+ Mask, FixedVectorType::get(IRB.getInt1Ty(), MaskNumElements),
+ "_ms_mask_bitcast");
/// For floating-point to integer conversion, the output is:
/// - fully uninitialized if *any* bit of the input is uninitialized
@@ -4617,10 +4620,11 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// We use the scalar width of the return type instead of A's.
AShadow = IRB.CreateSExt(
IRB.CreateICmpNE(AShadow, getCleanShadow(AShadow->getType())),
- getShadowTy(&I));
+ getShadowTy(&I), "_ms_a_shadow");
Value *WriteThroughShadow = getShadow(WriteThrough);
- Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow);
+ Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow,
+ "_ms_writethru_select");
setShadow(&I, Shadow);
setOriginForNaryOp(I);
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
index 46fd39be9622b..a2f1d65e7cd41 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
@@ -1911,29 +1911,29 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16
; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP10]], <16 x i16> [[TMP12]], <16 x i16> zeroinitializer
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
-; CHECK: 12:
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK: 8:
; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK]])
; CHECK-NEXT: [[TMP25:%.*]] = bitcast i16 [[MASK]] to <16 x i1>
; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP27:%.*]] = sext <16 x i1> [[TMP26]] to <16 x i16>
; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP25]], <16 x i16> [[TMP27]], <16 x i16> [[TMP3]]
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
-; CHECK: 18:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
+; CHECK: 10:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 19:
+; CHECK: 11:
; CHECK-NEXT: [[RES3:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 12, <16 x i16> [[SRC:%.*]], i16 [[MASK]])
; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP24:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
-; CHECK: 20:
+; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]]
+; CHECK: 12:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 21:
+; CHECK: 13:
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 87960930222080
; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
@@ -7446,10 +7446,10 @@ define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x
; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP3]], <16 x i32> [[TMP5]], <16 x i32> [[TMP2]]
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP10]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 8:
+; CHECK: 5:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 9:
+; CHECK: 6:
; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2]], i32 10)
; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
index 25e1bedb477c5..e2dc8cbdca968 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
@@ -7973,11 +7973,11 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %s
; CHECK-NEXT: [[TMP13:%.*]] = sext <8 x i1> [[TMP12]] to <8 x i16>
; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP11]], <8 x i16> [[TMP13]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]]
-; CHECK: [[BB15]]:
+; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK: [[BB8]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB16]]:
+; CHECK: [[BB9]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 10, <8 x i16> zeroinitializer, i8 [[MASK]])
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
; CHECK-NEXT: [[TMP18:%.*]] = trunc i8 [[MASK]] to i4
@@ -7987,11 +7987,11 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %s
; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP21]] to <8 x i16>
; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP20]], <8 x i16> [[TMP22]], <8 x i16> [[TMP3]]
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
-; CHECK: [[BB24]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
+; CHECK: [[BB12]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB25]]:
+; CHECK: [[BB13]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 11, <8 x i16> [[SRC]], i8 [[MASK]])
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], [[TMP14]]
; CHECK-NEXT: [[RES0:%.*]] = add <8 x i16> [[RES1]], [[RES2]]
@@ -8027,22 +8027,22 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %s
; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i16>
; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP9]], <8 x i16> [[TMP11]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
-; CHECK: [[BB11]]:
+; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; CHECK: [[BB6]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB12]]:
+; CHECK: [[BB7]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 11, <8 x i16> zeroinitializer, i8 [[MASK]])
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
; CHECK-NEXT: [[TMP17:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
; CHECK-NEXT: [[TMP18:%.*]] = sext <8 x i1> [[TMP17]] to <8 x i16>
; CHECK-NEXT: [[TMP19:%.*]] = select <8 x i1> [[TMP16]], <8 x i16> [[TMP18]], <8 x i16> [[TMP3]]
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]]
+; CHECK: [[BB9]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB10]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 12, <8 x i16> [[SRC]], i8 [[MASK]])
; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], [[TMP12]]
; CHECK-NEXT: [[RES0:%.*]] = add <8 x i16> [[RES1]], [[RES2]]
>From 6e8d1eafd27407b1dc0258259a475eb480c59f70 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 16:15:10 +0000
Subject: [PATCH 6/7] Update comments
---
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 9eca1fb35ddf4..5a8b3e2dcfdb5 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -5385,6 +5385,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
+ // Convert Packed Double Precision Floating-Point Values
+ // to Packed Single PrecisionFloating-Point Values
case Intrinsic::x86_sse2_cvtpd2ps:
case Intrinsic::x86_sse2_cvtps2dq:
case Intrinsic::x86_sse2_cvtpd2dq:
@@ -5399,6 +5401,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
+ // Shift Packed Data (Left Logical, Right Arithmetic, Right Logical)
case Intrinsic::x86_avx512_psll_w_512:
case Intrinsic::x86_avx512_psll_d_512:
case Intrinsic::x86_avx512_psll_q_512:
@@ -5966,7 +5969,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
// Convert Packed Single Precision Floating-Point Values
- // to Packed SignedDoubleword Integer Values
+ // to Packed Signed Doubleword Integer Values
//
// <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
// (<16 x float>, <16 x i32>, i16, i32)
>From 1b4441bf3b7e0d56f74969033deb31c8e26a6a5d Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Wed, 20 Aug 2025 16:17:39 +0000
Subject: [PATCH 7/7] Move convert handlers closer together, update comments
---
.../Instrumentation/MemorySanitizer.cpp | 52 ++++++++++---------
1 file changed, 28 insertions(+), 24 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 5a8b3e2dcfdb5..d862c3a46e6cb 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -5345,6 +5345,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_sse_ldmxcsr:
handleLdmxcsr(I);
break;
+
+ // Convert Scalar Double Precision Floating-Point Value
+ // to Unsigned DoublewordInteger
+ // etc.
case Intrinsic::x86_avx512_vcvtsd2usi64:
case Intrinsic::x86_avx512_vcvtsd2usi32:
case Intrinsic::x86_avx512_vcvtss2usi64:
@@ -5386,7 +5390,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
// Convert Packed Double Precision Floating-Point Values
- // to Packed Single PrecisionFloating-Point Values
+ // to Packed Single Precision Floating-Point Values
case Intrinsic::x86_sse2_cvtpd2ps:
case Intrinsic::x86_sse2_cvtps2dq:
case Intrinsic::x86_sse2_cvtpd2dq:
@@ -5401,6 +5405,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
break;
}
+ // Convert Packed Single Precision Floating-Point Values
+ // to Packed Signed Doubleword Integer Values
+ //
+ // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
+ // (<16 x float>, <16 x i32>, i16, i32)
+ case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
+ handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
+ break;
+ }
+
+ // Convert Single-Precision FP Value to 16-bit FP Value
+ // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
+ // (<16 x float>, i32, <16 x i16>, i16)
+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
+ // (<4 x float>, i32, <8 x i16>, i8)
+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
+ // (<8 x float>, i32, <8 x i16>, i8)
+ case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
+ case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
+ case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
+ handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true);
+ break;
+
// Shift Packed Data (Left Logical, Right Arithmetic, Right Logical)
case Intrinsic::x86_avx512_psll_w_512:
case Intrinsic::x86_avx512_psll_d_512:
@@ -5968,29 +5995,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/*trailingVerbatimArgs=*/1);
break;
- // Convert Packed Single Precision Floating-Point Values
- // to Packed Signed Doubleword Integer Values
- //
- // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
- // (<16 x float>, <16 x i32>, i16, i32)
- case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
- handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
- break;
- }
-
- // Convert Single-Precision FP Value to 16-bit FP Value
- // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
- // (<16 x float>, i32, <16 x i16>, i16)
- // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
- // (<4 x float>, i32, <8 x i16>, i8)
- // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
- // (<8 x float>, i32, <8 x i16>, i8)
- case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
- case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
- case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
- handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true);
- break;
-
// AVX512 PMOV: Packed MOV, with truncation
// Precisely handled by applying the same intrinsic to the shadow
case Intrinsic::x86_avx512_mask_pmov_dw_512:
More information about the llvm-commits
mailing list