[llvm] 2fca446 - [msan] Handle AVX512 pack with saturation intrinsics (#157984)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 11 10:41:43 PDT 2025
Author: Thurston Dang
Date: 2025-09-11T10:41:40-07:00
New Revision: 2fca446779333f540b1a582f4a8cbc14744e8e18
URL: https://github.com/llvm/llvm-project/commit/2fca446779333f540b1a582f4a8cbc14744e8e18
DIFF: https://github.com/llvm/llvm-project/commit/2fca446779333f540b1a582f4a8cbc14744e8e18.diff
LOG: [msan] Handle AVX512 pack with saturation intrinsics (#157984)
Approximately handle avx512_{packssdw/packsswb/packusdw/packuswb} with
the existing handleVectorPackIntrinsic(), instead of relying on the
default (strict) handler.
Added:
Modified:
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 9899a2aae2b15..3ea790ad1839a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3684,6 +3684,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_mmx_packssdw:
return Intrinsic::x86_mmx_packssdw;
+
+ case Intrinsic::x86_avx512_packssdw_512:
+ case Intrinsic::x86_avx512_packusdw_512:
+ return Intrinsic::x86_avx512_packssdw_512;
+
+ case Intrinsic::x86_avx512_packsswb_512:
+ case Intrinsic::x86_avx512_packuswb_512:
+ return Intrinsic::x86_avx512_packsswb_512;
+
default:
llvm_unreachable("unexpected intrinsic id");
}
@@ -3696,6 +3705,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// Shadow is propagated with the signed variant of the same intrinsic applied
// to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
// MMXEltSizeInBits is used only for x86mmx arguments.
+ //
+ // TODO: consider using GetMinMaxUnsigned() to handle saturation precisely
void handleVectorPackIntrinsic(IntrinsicInst &I,
unsigned MMXEltSizeInBits = 0) {
assert(I.arg_size() == 2);
@@ -5554,6 +5565,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
handleVectorShiftIntrinsic(I, /* Variable */ true);
break;
+ // Pack with Signed/Unsigned Saturation
case Intrinsic::x86_sse2_packsswb_128:
case Intrinsic::x86_sse2_packssdw_128:
case Intrinsic::x86_sse2_packuswb_128:
@@ -5562,6 +5574,15 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_avx2_packssdw:
case Intrinsic::x86_avx2_packuswb:
case Intrinsic::x86_avx2_packusdw:
+ // e.g., <64 x i8> @llvm.x86.avx512.packsswb.512
+ // (<32 x i16> %a, <32 x i16> %b)
+ // <32 x i16> @llvm.x86.avx512.packssdw.512
+ // (<16 x i32> %a, <16 x i32> %b)
+ // Note: AVX512 masked variants are auto-upgraded by LLVM.
+ case Intrinsic::x86_avx512_packsswb_512:
+ case Intrinsic::x86_avx512_packssdw_512:
+ case Intrinsic::x86_avx512_packuswb_512:
+ case Intrinsic::x86_avx512_packusdw_512:
handleVectorPackIntrinsic(I);
break;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
index 51dad35a1edbc..7bd35182d5c90 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
@@ -5,8 +5,6 @@
;
; Strictly handled:
; - llvm.x86.avx512.dbpsadbw.512
-; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512
-; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512
;
; Heuristically handled:
; - llvm.sadd.sat.v32i16, llvm.sadd.sat.v64i8
@@ -2039,19 +2037,14 @@ define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) no
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <32 x i16> [[TMP7]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]])
+; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]])
+; CHECK-NEXT: store <32 x i16> [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <32 x i16> [[TMP8]]
;
%res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
ret <32 x i16> %res
@@ -2064,25 +2057,20 @@ define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]])
+; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP17]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP9]]
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP17]], <32 x i16> [[PASSTHRU]]
; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP16]]
;
@@ -2096,25 +2084,20 @@ define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b,
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]])
+; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP8]]
; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]]
-; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP16]], <32 x i16> zeroinitializer
; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP15]]
;
@@ -2138,18 +2121,13 @@ define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) nounw
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP12]]
;
%b = load <16 x i32>, ptr %ptr_b
@@ -2175,22 +2153,17 @@ define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]]
@@ -2219,22 +2192,17 @@ define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32
; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer
@@ -2266,18 +2234,13 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) noun
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP12]]
;
%q = load i32, ptr %ptr_b
@@ -2309,22 +2272,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]]
@@ -2359,22 +2317,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i3
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer
@@ -2395,19 +2348,14 @@ define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) nou
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
-; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <64 x i8> [[TMP7]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]])
+; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]])
+; CHECK-NEXT: store <64 x i8> [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <64 x i8> [[TMP8]]
;
%res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
ret <64 x i8> %res
@@ -2420,25 +2368,20 @@ define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <6
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16>
+; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]])
+; CHECK-NEXT: [[TMP17:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP17]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP9]]
; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP17]], <64 x i8> [[PASSTHRU]]
; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP16]]
;
@@ -2452,25 +2395,20 @@ define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16>
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16>
+; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]])
+; CHECK-NEXT: [[TMP16:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP8]]
; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]]
-; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP16]], <64 x i8> zeroinitializer
; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP15]]
;
@@ -2494,18 +2432,13 @@ define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounwi
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16>
+; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
-; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP12]]
;
%b = load <32 x i16>, ptr %ptr_b
@@ -2531,22 +2464,17 @@ define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16>
+; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]]
@@ -2575,22 +2503,17 @@ define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64
; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer
@@ -2610,18 +2533,13 @@ define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) n
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP8]], <16 x i32> [[TMP6]])
; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <32 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP7]]
;
%res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
@@ -2635,22 +2553,17 @@ define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b,
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP17]], <16 x i32> [[TMP8]])
; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP6]], <32 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP6]]
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]]
; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]]
@@ -2667,22 +2580,17 @@ define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b,
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP16]], <16 x i32> [[TMP7]])
; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP5]], <32 x i16> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer
@@ -2709,18 +2617,13 @@ define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) noun
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP12]]
;
%b = load <16 x i32>, ptr %ptr_b
@@ -2746,22 +2649,17 @@ define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]]
@@ -2790,22 +2688,17 @@ define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i3
; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer
@@ -2837,18 +2730,13 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) nou
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP12]]
;
%q = load i32, ptr %ptr_b
@@ -2880,22 +2768,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <3
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]]
@@ -2930,22 +2813,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer
@@ -2966,18 +2844,13 @@ define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) no
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16>
+; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP8]], <32 x i16> [[TMP6]])
; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
-; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <64 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP7]]
;
%res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
@@ -2991,22 +2864,17 @@ define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16>
+; CHECK-NEXT: [[TMP6:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP17]], <32 x i16> [[TMP8]])
; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP6]], <64 x i8> [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP6]]
; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]]
; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]]
@@ -3023,22 +2891,17 @@ define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b,
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16>
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP16]], <32 x i16> [[TMP7]])
; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP5]], <64 x i8> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer
@@ -3065,18 +2928,13 @@ define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) nounw
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16>
+; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
-; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP12]]
;
%b = load <32 x i16>, ptr %ptr_b
@@ -3102,22 +2960,17 @@ define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16>
+; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]]
@@ -3146,22 +2999,17 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64
; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
index c6c7e002213bd..8bf6d5acc21ba 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
@@ -7,8 +7,6 @@
; - llvm.x86.avx512.dbpsadbw.512
; - llvm.x86.avx512.ktestc.d, llvm.x86.avx512.ktestc.q, llvm.x86.avx512.ktestz.d, llvm.x86.avx512.ktestz.q
; - llvm.x86.avx512.mask.pmov.wb.mem.512
-; - llvm.x86.avx512.packssdw.512, llvm.x86.avx512.packsswb.512
-; - llvm.x86.avx512.packusdw.512, llvm.x86.avx512.packuswb.512
; - llvm.x86.avx512.psad.bw.512
;
; Heuristically handled:
@@ -295,19 +293,14 @@ define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) #0
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <32 x i16> [[TMP7]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]])
+; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]])
+; CHECK-NEXT: store <32 x i16> [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <32 x i16> [[TMP8]]
;
%1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
ret <32 x i16> %1
@@ -320,25 +313,20 @@ define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]])
+; CHECK-NEXT: [[TMP17:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP17]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP9]]
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP17]], <32 x i16> [[PASSTHRU]]
; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP16]]
;
@@ -354,25 +342,20 @@ define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b,
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A]], <16 x i32> [[B]])
+; CHECK-NEXT: [[TMP16:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A1:%.*]], <16 x i32> [[B1:%.*]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP8]]
; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]]
-; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP16]], <32 x i16> zeroinitializer
; CHECK-NEXT: store <32 x i16> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP15]]
;
@@ -398,18 +381,13 @@ define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP12]]
;
%b = load <16 x i32>, ptr %ptr_b
@@ -435,22 +413,17 @@ define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]]
@@ -481,22 +454,17 @@ define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i32
; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer
@@ -530,18 +498,13 @@ define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0 {
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP12]]
;
%q = load i32, ptr %ptr_b
@@ -573,22 +536,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <32
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]]
@@ -625,22 +583,17 @@ define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i3
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer
@@ -663,19 +616,14 @@ define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) #0
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
-; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <64 x i8> [[TMP7]]
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]])
+; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]])
+; CHECK-NEXT: store <64 x i8> [[TMP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <64 x i8> [[TMP8]]
;
%1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
ret <64 x i8> %1
@@ -688,25 +636,20 @@ define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <6
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16>
+; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]])
+; CHECK-NEXT: [[TMP17:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP17]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP9]]
; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP17]], <64 x i8> [[PASSTHRU]]
; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP16]]
;
@@ -722,25 +665,20 @@ define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[A:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16>
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[B:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16>
+; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A]], <32 x i16> [[B]])
+; CHECK-NEXT: [[TMP16:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A1:%.*]], <32 x i16> [[B1:%.*]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP8]]
; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]]
-; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP16]], <64 x i8> zeroinitializer
; CHECK-NEXT: store <64 x i8> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP15]]
;
@@ -766,18 +704,13 @@ define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16>
+; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
-; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP12]]
;
%b = load <32 x i16>, ptr %ptr_b
@@ -803,22 +736,17 @@ define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64 x
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16>
+; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]]
@@ -849,22 +777,17 @@ define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64
; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer
@@ -886,18 +809,13 @@ define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) #
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP8]], <16 x i32> [[TMP6]])
; CHECK-NEXT: [[TMP7:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <32 x i16> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP7]]
;
%1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
@@ -911,22 +829,17 @@ define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b,
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = sext <16 x i1> [[TMP7]] to <16 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP17]], <16 x i32> [[TMP8]])
; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP6]], <32 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = xor <32 x i16> [[TMP9]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], [[TMP6]]
; CHECK-NEXT: [[TMP15:%.*]] = or <32 x i16> [[TMP14]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP15]], <32 x i16> [[TMP12]]
; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP11]], <32 x i16> [[TMP9]], <32 x i16> [[PASSTHRU]]
@@ -945,22 +858,17 @@ define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b,
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = sext <16 x i1> [[TMP6]] to <16 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP16]], <16 x i32> [[TMP7]])
; CHECK-NEXT: [[TMP8:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B:%.*]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP5]], <32 x i16> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = xor <32 x i16> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <32 x i16> [[TMP12]], [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = or <32 x i16> [[TMP13]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP9]], <32 x i16> [[TMP14]], <32 x i16> [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP10]], <32 x i16> [[TMP8]], <32 x i16> zeroinitializer
@@ -989,18 +897,13 @@ define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, ptr %ptr_b) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP7]], align 64
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP12]]
;
%b = load <16 x i32>, ptr %ptr_b
@@ -1026,22 +929,17 @@ define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, ptr %ptr_b, <32
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]]
@@ -1072,22 +970,17 @@ define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, ptr %ptr_b, i3
; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer
@@ -1121,18 +1014,13 @@ define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, ptr %ptr_b) #0
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP13]], <16 x i32> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
-; CHECK-NEXT: store <32 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <32 x i16> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP12]]
;
%q = load i32, ptr %ptr_b
@@ -1164,22 +1052,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, ptr %ptr_b, <3
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <16 x i1> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <16 x i1> [[TMP12]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP22]], <16 x i32> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> zeroinitializer, <32 x i16> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP11]], <32 x i16> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <32 x i16> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <32 x i16> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP20]], <32 x i16> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <32 x i1> [[TMP16]], <32 x i16> [[TMP14]], <32 x i16> [[PASSTHRU]]
@@ -1216,22 +1099,17 @@ define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, ptr %ptr_b, i
; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <16 x i32> poison, i32 [[Q]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <16 x i32> [[_MSPROP]], <16 x i32> splat (i32 -1), <16 x i32> zeroinitializer
; CHECK-NEXT: [[B:%.*]] = shufflevector <16 x i32> [[VECINIT_I]], <16 x i32> poison, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <16 x i1> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <16 x i32> [[_MSPROP1]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <16 x i1> [[TMP11]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[TMP21]], <16 x i32> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> [[A:%.*]], <16 x i32> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i32 [[TMP3]] to <32 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> zeroinitializer, <32 x i16> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP10]], <32 x i16> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <32 x i16> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <32 x i16> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <32 x i16> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <32 x i1> [[TMP14]], <32 x i16> [[TMP19]], <32 x i16> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <32 x i1> [[TMP15]], <32 x i16> [[TMP13]], <32 x i16> zeroinitializer
@@ -1254,18 +1132,13 @@ define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) #0
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP3]] to <32 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16>
+; CHECK-NEXT: [[TMP4:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP8]], <32 x i16> [[TMP6]])
; CHECK-NEXT: [[TMP7:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
-; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <64 x i8> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP7]]
;
%1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
@@ -1279,22 +1152,17 @@ define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <64 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = sext <32 x i1> [[TMP5]] to <32 x i16>
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = sext <32 x i1> [[TMP7]] to <32 x i16>
+; CHECK-NEXT: [[TMP6:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP17]], <32 x i16> [[TMP8]])
; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP6]], <64 x i8> [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = xor <64 x i8> [[TMP9]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], [[TMP6]]
; CHECK-NEXT: [[TMP15:%.*]] = or <64 x i8> [[TMP14]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP15]], <64 x i8> [[TMP12]]
; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP11]], <64 x i8> [[TMP9]], <64 x i8> [[PASSTHRU]]
@@ -1313,22 +1181,17 @@ define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b,
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <32 x i16> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <32 x i16> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = sext <32 x i1> [[TMP4]] to <32 x i16>
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = sext <32 x i1> [[TMP6]] to <32 x i16>
+; CHECK-NEXT: [[TMP5:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP16]], <32 x i16> [[TMP7]])
; CHECK-NEXT: [[TMP8:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B:%.*]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP5]], <64 x i8> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = xor <64 x i8> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <64 x i8> [[TMP12]], [[TMP5]]
; CHECK-NEXT: [[TMP14:%.*]] = or <64 x i8> [[TMP13]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP9]], <64 x i8> [[TMP14]], <64 x i8> [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = select <64 x i1> [[TMP10]], <64 x i8> [[TMP8]], <64 x i8> zeroinitializer
@@ -1357,18 +1220,13 @@ define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, ptr %ptr_b) #0 {
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP7]], align 64
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP8]] to <32 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16>
+; CHECK-NEXT: [[TMP9:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP13]], <32 x i16> [[TMP11]])
; CHECK-NEXT: [[TMP12:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
-; CHECK-NEXT: store <64 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <64 x i8> [[TMP9]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[TMP12]]
;
%b = load <32 x i16>, ptr %ptr_b
@@ -1394,22 +1252,17 @@ define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, ptr %ptr_b, <64
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK-NEXT: [[TMP10:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = sext <32 x i1> [[TMP10]] to <32 x i16>
+; CHECK-NEXT: [[TMP12:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = sext <32 x i1> [[TMP12]] to <32 x i16>
+; CHECK-NEXT: [[TMP11:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP22]], <32 x i16> [[TMP13]])
; CHECK-NEXT: [[TMP14:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> zeroinitializer, <64 x i8> [[TMP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP11]], <64 x i8> [[TMP4]]
; CHECK-NEXT: [[TMP18:%.*]] = xor <64 x i8> [[TMP14]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], [[TMP11]]
; CHECK-NEXT: [[TMP20:%.*]] = or <64 x i8> [[TMP19]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP20]], <64 x i8> [[TMP17]]
; CHECK-NEXT: [[TMP21:%.*]] = select <64 x i1> [[TMP16]], <64 x i8> [[TMP14]], <64 x i8> [[PASSTHRU]]
@@ -1440,22 +1293,17 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, ptr %ptr_b, i64
; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <32 x i16>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <32 x i16> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <32 x i16> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
+; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <32 x i16> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = sext <32 x i1> [[TMP9]] to <32 x i16>
+; CHECK-NEXT: [[TMP11:%.*]] = icmp ne <32 x i16> [[_MSLD]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = sext <32 x i1> [[TMP11]] to <32 x i16>
+; CHECK-NEXT: [[TMP10:%.*]] = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> [[TMP21]], <32 x i16> [[TMP12]])
; CHECK-NEXT: [[TMP13:%.*]] = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> [[A:%.*]], <32 x i16> [[B]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i64 [[TMP3]] to <64 x i1>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> zeroinitializer, <64 x i8> zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP10]], <64 x i8> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = xor <64 x i8> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP18:%.*]] = or <64 x i8> [[TMP17]], [[TMP10]]
; CHECK-NEXT: [[TMP19:%.*]] = or <64 x i8> [[TMP18]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <64 x i1> [[TMP14]], <64 x i8> [[TMP19]], <64 x i8> [[TMP16]]
; CHECK-NEXT: [[TMP20:%.*]] = select <64 x i1> [[TMP15]], <64 x i8> [[TMP13]], <64 x i8> zeroinitializer
More information about the llvm-commits
mailing list