[llvm] [msan] Handle AVX512 VCVTPS2PH (PR #154460)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 19 20:04:26 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-compiler-rt-sanitizer
Author: Thurston Dang (thurstond)
<details>
<summary>Changes</summary>
This extends maybeExtendVectorShadowWithZeros() from 556c8467d15a131552e3c84478d768bafd95d4e6 (https://github.com/llvm/llvm-project/pull/147377) to handle AVX512 VCVTPS2PH.
---
Patch is 22.33 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/154460.diff
3 Files Affected:
- (modified) llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp (+80-23)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll (+24-28)
- (modified) llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll (+47-46)
``````````diff
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 948e2c6e06843..13262c2c8b36f 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3429,8 +3429,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return ShadowType;
}
- /// Doubles the length of a vector shadow (filled with zeros) if necessary to
- /// match the length of the shadow for the instruction.
+ /// Doubles the length of a vector shadow (extending with zeros) if necessary
+ /// to match the length of the shadow for the instruction.
+ /// If scalar types of the vectors are different, it will use the type of the
+ /// input vector.
/// This is more type-safe than CreateShadowCast().
Value *maybeExtendVectorShadowWithZeros(Value *Shadow, IntrinsicInst &I) {
IRBuilder<> IRB(&I);
@@ -3440,10 +3442,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
Value *FullShadow = getCleanShadow(&I);
assert(cast<FixedVectorType>(Shadow->getType())->getNumElements() <=
cast<FixedVectorType>(FullShadow->getType())->getNumElements());
- assert(cast<FixedVectorType>(Shadow->getType())->getScalarType() ==
- cast<FixedVectorType>(FullShadow->getType())->getScalarType());
- if (Shadow->getType() == FullShadow->getType()) {
+ if (cast<FixedVectorType>(Shadow->getType())->getNumElements() ==
+ cast<FixedVectorType>(FullShadow->getType())->getNumElements()) {
FullShadow = Shadow;
} else {
// TODO: generalize beyond 2x?
@@ -4528,55 +4529,93 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
return isFixedFPVectorTy(V->getType());
}
- // e.g., call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
- // (<16 x float> a, <16 x i32> writethru, i16 mask,
- // i32 rounding)
+ // e.g., <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
+ // (<16 x float> a, <16 x i32> writethru, i16 mask,
+ // i32 rounding)
+ //
+ // Inconveniently, some similar intrinsics have a different operand order:
+ // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
+ // (<16 x float> a, i32 rounding, <16 x i16> writethru,
+ // i16 mask)
+ //
+ // If the return type has more elements than A, the excess elements are
+ // zeroed (and the corresponding shadow is initialized).
+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
+ // (<4 x float> a, i32 rounding, <8 x i16> writethru,
+ // i8 mask)
//
// dst[i] = mask[i] ? convert(a[i]) : writethru[i]
// dst_shadow[i] = mask[i] ? all_or_nothing(a_shadow[i]) : writethru_shadow[i]
// where all_or_nothing(x) is fully uninitialized if x has any
// uninitialized bits
- void handleAVX512VectorConvertFPToInt(IntrinsicInst &I) {
+ void handleAVX512VectorConvertFPToInt(IntrinsicInst &I, bool LastMask) {
IRBuilder<> IRB(&I);
assert(I.arg_size() == 4);
Value *A = I.getOperand(0);
- Value *WriteThrough = I.getOperand(1);
- Value *Mask = I.getOperand(2);
- Value *RoundingMode = I.getOperand(3);
+ Value *WriteThrough;
+ Value *Mask;
+ Value *RoundingMode;
+ if (LastMask) {
+ WriteThrough = I.getOperand(2);
+ Mask = I.getOperand(3);
+ RoundingMode = I.getOperand(1);
+ } else {
+ WriteThrough = I.getOperand(1);
+ Mask = I.getOperand(2);
+ RoundingMode = I.getOperand(3);
+ }
assert(isFixedFPVector(A));
assert(isFixedIntVector(WriteThrough));
unsigned ANumElements =
cast<FixedVectorType>(A->getType())->getNumElements();
- assert(ANumElements ==
- cast<FixedVectorType>(WriteThrough->getType())->getNumElements());
+ unsigned WriteThruNumElements =
+ cast<FixedVectorType>(WriteThrough->getType())->getNumElements();
+ assert(ANumElements == WriteThruNumElements ||
+ ANumElements * 2 == WriteThruNumElements);
assert(Mask->getType()->isIntegerTy());
- assert(Mask->getType()->getScalarSizeInBits() == ANumElements);
+ unsigned MaskNumElements = Mask->getType()->getScalarSizeInBits();
+ assert(ANumElements == MaskNumElements ||
+ ANumElements * 2 == MaskNumElements);
+
+ assert(WriteThruNumElements == MaskNumElements);
+
insertCheckShadowOf(Mask, &I);
assert(RoundingMode->getType()->isIntegerTy());
- // Only four bits of the rounding mode are used, though it's very
+ // Only some bits of the rounding mode are used, though it's very
// unusual to have uninitialized bits there (more commonly, it's a
// constant).
insertCheckShadowOf(RoundingMode, &I);
assert(I.getType() == WriteThrough->getType());
+ Value *AShadow = getShadow(A);
+ AShadow = maybeExtendVectorShadowWithZeros(AShadow, I);
+
+ if (ANumElements * 2 == MaskNumElements) {
+ // Ensure that the irrelevant bits of the mask are zero, hence selecting
+ // from the zeroed shadow instead of the writethrough's shadow.
+ Mask = IRB.CreateTrunc(Mask, IRB.getIntNTy(ANumElements));
+ Mask = IRB.CreateZExt(Mask, IRB.getIntNTy(MaskNumElements));
+ }
+
// Convert i16 mask to <16 x i1>
Mask = IRB.CreateBitCast(
- Mask, FixedVectorType::get(IRB.getInt1Ty(), ANumElements));
+ Mask, FixedVectorType::get(IRB.getInt1Ty(), MaskNumElements));
- Value *AShadow = getShadow(A);
- /// For scalars:
- /// Since they are converting from floating-point, the output is:
+ /// For floating-point to integer conversion, the output is:
/// - fully uninitialized if *any* bit of the input is uninitialized
/// - fully ininitialized if all bits of the input are ininitialized
/// We apply the same principle on a per-element basis for vectors.
- AShadow = IRB.CreateSExt(IRB.CreateICmpNE(AShadow, getCleanShadow(A)),
- getShadowTy(A));
+ ///
+ /// We use the scalar width of the return type instead of A's.
+ AShadow = IRB.CreateSExt(
+ IRB.CreateICmpNE(AShadow, getCleanShadow(AShadow->getType())),
+ getShadowTy(&I));
Value *WriteThroughShadow = getShadow(WriteThrough);
Value *Shadow = IRB.CreateSelect(Mask, AShadow, WriteThroughShadow);
@@ -5920,11 +5959,29 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/*trailingVerbatimArgs=*/1);
break;
+ // Convert Packed Single Precision Floating-Point Values
+ // to Packed SignedDoubleword Integer Values
+ //
+ // <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512
+ // (<16 x float>, <16 x i32>, i16, i32)
case Intrinsic::x86_avx512_mask_cvtps2dq_512: {
- handleAVX512VectorConvertFPToInt(I);
+ handleAVX512VectorConvertFPToInt(I, /*LastMask=*/false);
break;
}
+ // Convert Single-Precision FP Value to 16-bit FP Value
+ // <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512
+ // (<16 x float>, i32, <16 x i16>, i16)
+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128
+ // (<4 x float>, i32, <8 x i16>, i8)
+ // <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256
+ // (<8 x float>, i32, <8 x i16>, i8)
+ case Intrinsic::x86_avx512_mask_vcvtps2ph_512:
+ case Intrinsic::x86_avx512_mask_vcvtps2ph_256:
+ case Intrinsic::x86_avx512_mask_vcvtps2ph_128:
+ handleAVX512VectorConvertFPToInt(I, /*LastMask=*/true);
+ break;
+
// AVX512 PMOV: Packed MOV, with truncation
// Precisely handled by applying the same intrinsic to the shadow
case Intrinsic::x86_avx512_mask_pmov_dw_512:
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
index 3c1af6781f0ed..eba0beb5bf6ac 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
@@ -1903,50 +1903,46 @@ define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i16>
+; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> splat (i1 true), <16 x i16> [[TMP7]], <16 x i16> zeroinitializer
; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0:%.*]], i32 2, <16 x i16> zeroinitializer, i16 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i16>
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP10]], <16 x i16> [[TMP12]], <16 x i16> zeroinitializer
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 12:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i16> [[TMP3]] to i256
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
; CHECK: 13:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK]])
+; CHECK-NEXT: [[TMP25:%.*]] = bitcast i16 [[MASK]] to <16 x i1>
+; CHECK-NEXT: [[TMP26:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP27:%.*]] = sext <16 x i1> [[TMP26]] to <16 x i16>
+; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP25]], <16 x i16> [[TMP27]], <16 x i16> [[TMP3]]
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP6]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
+; CHECK: 18:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 14:
+; CHECK: 19:
; CHECK-NEXT: [[RES3:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 12, <16 x i16> [[SRC:%.*]], i16 [[MASK]])
; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
-; CHECK: 15:
+; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP24:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
+; CHECK: 20:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 16:
+; CHECK: 21:
; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 87960930222080
; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
-; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr [[TMP19]], align 32
+; CHECK-NEXT: store <16 x i16> [[TMP8]], ptr [[TMP19]], align 32
; CHECK-NEXT: store <16 x i16> [[RES1]], ptr [[DST]], align 32
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i16> [[TMP13]], [[TMP20]]
; CHECK-NEXT: [[RES:%.*]] = add <16 x i16> [[RES2]], [[RES3]]
-; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <16 x i16> [[_MSPROP]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[RES]]
;
%res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
index db0c2e7ae9ed6..e22301174a0ca 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
@@ -7893,40 +7893,44 @@ define <8 x i16> @test_x86_vcvtps2ph_128(<4 x float> %a0, i8 %mask, <8 x i16> %s
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
-; CHECK: [[BB5]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> bitcast (<1 x i8> splat (i8 15) to <8 x i1>), <8 x i16> [[TMP6]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 2, <8 x i16> zeroinitializer, i8 -1)
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i8 [[MASK]] to i4
+; CHECK-NEXT: [[TMP10:%.*]] = zext i4 [[TMP9]] to i8
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP10]] to <8 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <8 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <8 x i1> [[TMP12]] to <8 x i16>
+; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP11]], <8 x i16> [[TMP13]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
-; CHECK: [[BB8]]:
+; CHECK-NEXT: br i1 [[_MSCMP2]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]]
+; CHECK: [[BB15]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB9]]:
+; CHECK: [[BB16]]:
; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 10, <8 x i16> zeroinitializer, i8 [[MASK]])
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i16> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP18:%.*]] = trunc i8 [[MASK]] to i4
+; CHECK-NEXT: [[TMP19:%.*]] = zext i4 [[TMP18]] to i8
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast i8 [[TMP19]] to <8 x i1>
+; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <8 x i32> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <8 x i1> [[TMP21]] to <8 x i16>
+; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP20]], <8 x i16> [[TMP22]], <8 x i16> [[TMP3]]
; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
-; CHECK: [[BB12]]:
+; CHECK-NEXT: br i1 [[_MSCMP6]], label %[[BB24:.*]], label %[[BB25:.*]], !prof [[PROF1]]
+; CHECK: [[BB24]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB13]]:
+; CHECK: [[BB25]]:
; CHECK-NEXT: [[RES3:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> [[A0]], i32 11, <8 x i16> [[SRC]], i8 [[MASK]])
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i16> [[TMP7]], [[TMP14]]
; CHECK-NEXT: [[RES0:%.*]] = add <8 x i16> [[RES1]], [[RES2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i16> [[TMP23]], [[_MSPROP]]
; CHECK-NEXT: [[RES:%.*]] = add <8 x i16> [[RES3]], [[RES0]]
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[RES]]
;
%res1 = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.128(<4 x float> %a0, i32 2, <8 x i16> zeroinitializer, i8 -1)
@@ -7947,40 +7951,37 @@ define <8 x i16> @test_x86_vcvtps2ph_256(<8 x float> %a0, i8 %mask, <8 x i16> %s
; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB5:.*]], label %[[BB6:.*]], !prof [[PROF1]]
-; CHECK: [[BB5]]:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
-; CHECK-NEXT: unreachable
-; CHECK: [[BB6]]:
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> splat (i1 true), <8 x i16> [[TMP6]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.vcvtps2ph.256(<8 x float> [[A0]], i32 2, <8 x i16> zeroinitializer, i8 -1)
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[MASK]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <8 x i1> [[TMP10]] to <8 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP9]], <8 x i16> [[TMP11]], <8 x i16> zeroinitializer
; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/154460
More information about the llvm-commits
mailing list