[llvm] [msan] Handle x86_avx512_(min|max)_p[sd]_512 intrinsics (PR #124421)
Thurston Dang via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 26 17:05:34 PST 2025
https://github.com/thurstond updated https://github.com/llvm/llvm-project/pull/124421
>From eed794037e68b24a643e62e1a7f58bffdb546dff Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Sat, 25 Jan 2025 04:52:50 +0000
Subject: [PATCH 1/5] [msan] Handle x86_avx512_(min|max)_p[sd]_512 intrinsics
The AVX/SSE variants are already handled heuristically
(maybeHandleSimpleNomemIntrinsic via handleUnknownIntrinsic), but the AVX512
variants contain an additional parameter (the rounding method) which
fails to match heuristically. We generalize
maybeHandleSimpleNomemIntrinsic to allow additional flags (ignored by
MSan) and explicitly call it to handle AVX512 min/max ps/pd intrinsics.
---
.../Instrumentation/MemorySanitizer.cpp | 29 +++++++++++++++----
1 file changed, 23 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index b6293af4ab477a..6d7803db10ed01 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2989,17 +2989,22 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// Handle (SIMD arithmetic)-like intrinsics.
///
- /// Instrument intrinsics with any number of arguments of the same type,
- /// equal to the return type. The type should be simple (no aggregates or
- /// pointers; vectors are fine).
+ /// Instrument intrinsics with any number of arguments of the same type [*],
+ /// equal to the return type, plus a specified number of trailing flags of
+ /// any type.
+ ///
+ /// [*} The type should be simple (no aggregates or pointers; vectors are
+ /// fine).
+ ///
/// Caller guarantees that this intrinsic does not access memory.
- bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
+ bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I, unsigned int trailingFlags) {
Type *RetTy = I.getType();
if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy()))
return false;
unsigned NumArgOperands = I.arg_size();
- for (unsigned i = 0; i < NumArgOperands; ++i) {
+ assert(NumArgOperands >= trailingFlags);
+ for (unsigned i = 0; i < NumArgOperands - trailingFlags; ++i) {
Type *Ty = I.getArgOperand(i)->getType();
if (Ty != RetTy)
return false;
@@ -3043,7 +3048,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
if (I.doesNotAccessMemory())
- if (maybeHandleSimpleNomemIntrinsic(I))
+ if (maybeHandleSimpleNomemIntrinsic(I, /* trailingFlags */ 0))
return true;
// FIXME: detect and handle SSE maskstore/maskload?
@@ -4615,6 +4620,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_avx2_maskload_d_256:
case Intrinsic::x86_avx2_maskload_q_256: {
handleAVXMaskedLoad(I);
+
+ // Packed
+ case Intrinsic::x86_avx512_min_ps_512:
+ case Intrinsic::x86_avx512_min_pd_512:
+ case Intrinsic::x86_avx512_max_ps_512:
+ case Intrinsic::x86_avx512_max_pd_512: {
+ // These AVX512 variants contain the rounding mode as a trailing flag.
+ // Earlier variants do not have a trailing flag and are already handled
+ // by maybeHandleSimpleNomemIntrinsic(I, 0) via handleUnknownIntrinsic.
+ bool success = maybeHandleSimpleNomemIntrinsic(I, /* trailingFlags */ 1);
+ (void)success;
+ assert(success);
break;
}
>From d4822148cb722dedf140a21b1f99423db22cbeda Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Sat, 25 Jan 2025 17:31:46 +0000
Subject: [PATCH 2/5] Test (not the correct base)
---
.../MemorySanitizer/avx512-intrinsics.ll | 13602 ++++++++++++++++
1 file changed, 13602 insertions(+)
create mode 100644 llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll
diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll
new file mode 100644
index 00000000000000..c24f9cf377cf62
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll
@@ -0,0 +1,13602 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -S -mtriple=x86_64-linux-gnu -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s
+;
+; Forked from llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll
+
+define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_compress_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10:[0-9]+]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP11]]
+;
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1)
+ ret <8 x double> %2
+}
+
+define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_compress_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> zeroinitializer, <8 x i1> [[TMP4]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP9]]
+;
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1)
+ ret <8 x double> %2
+}
+
+define <8 x double> @test_compress_pd_512(<8 x double> %data) #0 {
+; CHECK-LABEL: @test_compress_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> undef, <8 x i1> splat (i1 true))
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP2]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x double> %1
+}
+
+define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_mask_compress_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP11]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1)
+ ret <16 x float> %2
+}
+
+define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) #0 {
+; CHECK-LABEL: @test_maskz_compress_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> zeroinitializer, <16 x i1> [[TMP4]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP9]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1)
+ ret <16 x float> %2
+}
+
+define <16 x float> @test_compress_ps_512(<16 x float> %data) #0 {
+; CHECK-LABEL: @test_compress_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> undef, <16 x i1> splat (i1 true))
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP2]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x float> %1
+}
+
+define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_compress_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP11]]
+;
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1)
+ ret <8 x i64> %2
+}
+
+define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_compress_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> zeroinitializer, <8 x i1> [[TMP4]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP9]]
+;
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1)
+ ret <8 x i64> %2
+}
+
+define <8 x i64> @test_compress_q_512(<8 x i64> %data) #0 {
+; CHECK-LABEL: @test_compress_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> undef, <8 x i1> splat (i1 true))
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP2]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x i64> %1
+}
+
+define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_mask_compress_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP11]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1)
+ ret <16 x i32> %2
+}
+
+define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) #0 {
+; CHECK-LABEL: @test_maskz_compress_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> zeroinitializer, <16 x i1> [[TMP4]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP9]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1)
+ ret <16 x i32> %2
+}
+
+define <16 x i32> @test_compress_d_512(<16 x i32> %data) #0 {
+; CHECK-LABEL: @test_compress_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> undef, <16 x i1> splat (i1 true))
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP2]]
+;
+ %1 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x i32> %1
+}
+
+define <8 x double> @test_expand_pd_512(<8 x double> %data) #0 {
+; CHECK-LABEL: @test_expand_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> undef, <8 x i1> splat (i1 true))
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP2]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x double> %1
+}
+
+define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_expand_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP11]]
+;
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1)
+ ret <8 x double> %2
+}
+
+define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_expand_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> zeroinitializer, <8 x i1> [[TMP4]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP9]]
+;
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1)
+ ret <8 x double> %2
+}
+
+define <16 x float> @test_expand_ps_512(<16 x float> %data) #0 {
+; CHECK-LABEL: @test_expand_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> undef, <16 x i1> splat (i1 true))
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP2]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_mask_expand_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP11]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1)
+ ret <16 x float> %2
+}
+
+define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) #0 {
+; CHECK-LABEL: @test_maskz_expand_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> zeroinitializer, <16 x i1> [[TMP4]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP9]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1)
+ ret <16 x float> %2
+}
+
+define <8 x i64> @test_expand_q_512(<8 x i64> %data) #0 {
+; CHECK-LABEL: @test_expand_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> undef, <8 x i1> splat (i1 true))
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP2]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <8 x i64> %1
+}
+
+define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_expand_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP11]]
+;
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1)
+ ret <8 x i64> %2
+}
+
+define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_expand_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> zeroinitializer, <8 x i1> [[TMP4]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP9]]
+;
+ %1 = bitcast i8 %mask to <8 x i1>
+ %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1)
+ ret <8 x i64> %2
+}
+
+define <16 x i32> @test_expand_d_512(<16 x i32> %data) #0 {
+; CHECK-LABEL: @test_expand_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> undef, <16 x i1> splat (i1 true))
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP2]]
+;
+ %1 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+ ret <16 x i32> %1
+}
+
+define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_mask_expand_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP11]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1)
+ ret <16 x i32> %2
+}
+
+define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) #0 {
+; CHECK-LABEL: @test_maskz_expand_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> zeroinitializer, <16 x i1> [[TMP4]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP9]]
+;
+ %1 = bitcast i16 %mask to <16 x i1>
+ %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1)
+ ret <16 x i32> %2
+}
+
+define <16 x float> @test_rcp_ps_512(<16 x float> %a0) #0 {
+; CHECK-LABEL: @test_rcp_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
+
+define <8 x double> @test_rcp_pd_512(<8 x double> %a0) #0 {
+; CHECK-LABEL: @test_rcp_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1)
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
+ ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
+
+declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32)
+
+define <2 x double> @test_rndscale_sd(<2 x double> %a, <2 x double> %b) #0 {
+; CHECK-LABEL: @test_rndscale_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 11, i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 11, i32 4)
+ ret <2 x double>%res
+}
+
+define <2 x double> @test_rndscale_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) #0 {
+; CHECK-LABEL: @test_rndscale_sd_mask(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4)
+ ret <2 x double>%res
+}
+
+define <2 x double> @test_rndscale_sd_mask_load(<2 x double> %a, ptr %bptr, <2 x double> %c, i8 %mask) #0 {
+; CHECK-LABEL: @test_rndscale_sd_mask_load(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[B:%.*]] = load <2 x double>, ptr [[BPTR:%.*]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[BPTR]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP9]], align 16
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %b = load <2 x double>, ptr %bptr
+ %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4)
+ ret <2 x double>%res
+}
+
+define <2 x double> @test_rndscale_sd_maskz(<2 x double> %a, <2 x double> %b, i8 %mask) #0 {
+; CHECK-LABEL: @test_rndscale_sd_maskz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 11, i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> zeroinitializer, i8 %mask, i32 11, i32 4)
+ ret <2 x double>%res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32)
+
+define <4 x float> @test_rndscale_ss(<4 x float> %a, <4 x float> %b) #0 {
+; CHECK-LABEL: @test_rndscale_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 11, i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4)
+ ret <4 x float>%res
+}
+
+define <4 x float> @test_rndscale_ss_load(<4 x float> %a, ptr %bptr) #0 {
+; CHECK-LABEL: @test_rndscale_ss_load(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[B:%.*]] = load <4 x float>, ptr [[BPTR:%.*]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[BPTR]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B]], <4 x float> undef, i8 -1, i32 11, i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %b = load <4 x float>, ptr %bptr
+ %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4)
+ ret <4 x float>%res
+}
+
+define <4 x float> @test_rndscale_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) #0 {
+; CHECK-LABEL: @test_rndscale_ss_mask(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 11, i32 4)
+ ret <4 x float>%res
+}
+
+define <4 x float> @test_rndscale_ss_maskz(<4 x float> %a, <4 x float> %b, i8 %mask) #0 {
+; CHECK-LABEL: @test_rndscale_ss_maskz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 11, i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask, i32 11, i32 4)
+ ret <4 x float>%res
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
+
+define <8 x double> @test7(<8 x double> %a) #0 {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> [[A:%.*]], i32 11, <8 x double> [[A]], i8 -1, i32 4)
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
+ ret <8 x double>%res
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
+
+define <16 x float> @test8(<16 x float> %a) #0 {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> [[A:%.*]], i32 11, <16 x float> [[A]], i16 -1, i32 4)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
+ ret <16 x float>%res
+}
+
+define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) #0 {
+; CHECK-LABEL: @test_rsqrt_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
+
+define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) #0 {
+; CHECK-LABEL: @test_sqrt_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]])
+; CHECK-NEXT: store <8 x i64> [[TMP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP2]]
+;
+ %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
+ ret <8 x double> %1
+}
+
+define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_sqrt_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP1]], <8 x i64> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64>
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[PASSTHRU:%.*]] to <8 x i64>
+; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP1]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]]
+; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> [[PASSTHRU]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP13]]
+;
+ %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
+ ret <8 x double> %3
+}
+
+define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_sqrt_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP1]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP3]] to <8 x i64>
+; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP1]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP10]], <8 x i64> [[TMP6]]
+; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP5]], <8 x double> [[TMP3]], <8 x double> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP11]]
+;
+ %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
+ ret <8 x double> %3
+}
+declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
+
+define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) #0 {
+; CHECK-LABEL: @test_sqrt_round_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11)
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP5]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
+ ret <8 x double> %1
+}
+
+define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_sqrt_round_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> zeroinitializer, <8 x i64> [[TMP3]]
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x double> [[TMP7]] to <8 x i64>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[PASSTHRU:%.*]] to <8 x i64>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP15]], <8 x i64> [[TMP10]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP9]], <8 x double> [[TMP7]], <8 x double> [[PASSTHRU]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP16]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
+ ret <8 x double> %3
+}
+
+define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_sqrt_round_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[TMP6:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11)
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[TMP6]] to <8 x i64>
+; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP13]], <8 x i64> [[TMP9]]
+; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP8]], <8 x double> [[TMP6]], <8 x double> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP14]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
+ ret <8 x double> %3
+}
+declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, i32) nounwind readnone
+
+define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) #0 {
+; CHECK-LABEL: @test_sqrt_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]])
+; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP2]]
+;
+ %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_mask_sqrt_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP4]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP8]], [[TMP9]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP1]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP12]], <16 x i32> [[TMP7]]
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP6]], <16 x float> [[TMP4]], <16 x float> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP13]]
+;
+ %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) #0 {
+; CHECK-LABEL: @test_maskz_sqrt_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]])
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP3]] to <16 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP7]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP1]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP10]], <16 x i32> [[TMP6]]
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[TMP3]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP11]]
+;
+ %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
+
+define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) #0 {
+; CHECK-LABEL: @test_sqrt_round_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP5]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_mask_sqrt_round_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> zeroinitializer, <16 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x float> [[TMP7]] to <16 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP15]], <16 x i32> [[TMP10]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP9]], <16 x float> [[TMP7]], <16 x float> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) #0 {
+; CHECK-LABEL: @test_maskz_sqrt_round_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11)
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[TMP6]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP13]], <16 x i32> [[TMP9]]
+; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP8]], <16 x float> [[TMP6]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP14]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, i32) nounwind readnone
+
+define <8 x double> @test_getexp_pd_512(<8 x double> %a0) #0 {
+; CHECK-LABEL: @test_getexp_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1, i32 4)
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
+ ret <8 x double> %res
+}
+define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) #0 {
+; CHECK-LABEL: @test_getexp_round_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1, i32 12)
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 12)
+ ret <8 x double> %res
+}
+declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
+
+define <16 x float> @test_getexp_ps_512(<16 x float> %a0) #0 {
+; CHECK-LABEL: @test_getexp_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1, i32 4)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+
+define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) #0 {
+; CHECK-LABEL: @test_getexp_round_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1, i32 8)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
+ ret <16 x float> %res
+}
+declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
+
+declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_sqrt_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES0:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
+; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]], i8 [[MASK]], i32 9)
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i128 [[TMP15]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSCMP13]], [[_MSCMP14]]
+; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
+; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> zeroinitializer, i8 [[MASK]], i32 10)
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0
+; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]]
+; CHECK-NEXT: br i1 [[_MSOR20]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]]
+; CHECK: 21:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 22:
+; CHECK-NEXT: [[RES3:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> zeroinitializer, i8 -1, i32 11)
+; CHECK-NEXT: [[RES_1:%.*]] = fadd <4 x float> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES_2:%.*]] = fadd <4 x float> [[RES2]], [[RES3]]
+; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[RES_1]], [[RES_2]]
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 10)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 11)
+
+ %res.1 = fadd <4 x float> %res0, %res1
+ %res.2 = fadd <4 x float> %res2, %res3
+ %res = fadd <4 x float> %res.1, %res.2
+ ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_sqrt_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES0:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
+; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]], i8 [[MASK]], i32 9)
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i128 [[TMP15]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSCMP13]], [[_MSCMP14]]
+; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
+; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> zeroinitializer, i8 [[MASK]], i32 10)
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0
+; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]]
+; CHECK-NEXT: br i1 [[_MSOR20]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]]
+; CHECK: 21:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 22:
+; CHECK-NEXT: [[RES3:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> zeroinitializer, i8 -1, i32 11)
+; CHECK-NEXT: [[RES_1:%.*]] = fadd <2 x double> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES_2:%.*]] = fadd <2 x double> [[RES2]], [[RES3]]
+; CHECK-NEXT: [[RES:%.*]] = fadd <2 x double> [[RES_1]], [[RES_2]]
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 10)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 11)
+
+ %res.1 = fadd <2 x double> %res0, %res1
+ %res.2 = fadd <2 x double> %res2, %res3
+ %res = fadd <2 x double> %res.1, %res.2
+ ret <2 x double> %res
+}
+
+define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_cvttsd2usi(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[A0:%.*]], i32 4)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[A0]], i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES2]]
+;
+ %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
+ %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_cvttsd2si(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[A0:%.*]], i32 4)
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[A0]], i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES2]]
+;
+ %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
+ %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_cvttss2si(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A0:%.*]], i32 8)
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A0]], i32 4)
+; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES2]]
+;
+ %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
+ %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvttss2si_load(ptr %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_cvttss2si_load(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
+; CHECK: 2:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 3:
+; CHECK-NEXT: [[A1:%.*]] = load <4 x float>, ptr [[A0:%.*]], align 16
+; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A0]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A1]], i32 4)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %a1 = load <4 x float>, ptr %a0
+ %res = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a1, i32 4) ;
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_cvttss2usi(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[A0:%.*]], i32 8)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[A0]], i32 4)
+; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES2]]
+;
+ %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
+ %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
+ %res2 = add i32 %res0, %res1
+ ret i32 %res2
+}
+declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_cvtsd2usi32(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0:%.*]], i32 4)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0]], i32 11)
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0]], i32 9)
+; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES4]]
+;
+ %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4)
+ %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 11)
+ %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 9)
+ %res3 = add i32 %res, %res1
+ %res4 = add i32 %res3, %res2
+ ret i32 %res4
+}
+declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_cvtsd2si32(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0:%.*]], i32 4)
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0]], i32 11)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0]], i32 9)
+; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES4]]
+;
+ %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4)
+ %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 11)
+ %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 9)
+ %res3 = add i32 %res, %res1
+ %res4 = add i32 %res3, %res2
+ ret i32 %res4
+}
+declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_cvtss2usi32(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0:%.*]], i32 4)
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0]], i32 11)
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0]], i32 9)
+; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES4]]
+;
+ %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4)
+ %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 11)
+ %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 9)
+ %res3 = add i32 %res, %res1
+ %res4 = add i32 %res3, %res2
+ ret i32 %res4
+}
+declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone
+
+define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_cvtss2si32(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0:%.*]], i32 4)
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0]], i32 11)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0]], i32 9)
+; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES4]]
+;
+ %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4)
+ %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 11)
+ %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 9)
+ %res3 = add i32 %res, %res1
+ %res4 = add i32 %res3, %res2
+ ret i32 %res4
+}
+declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone
+
+define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 %mask, ptr %dst) #0 {
+; CHECK-LABEL: @test_x86_vcvtps2ph_256(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0:%.*]], i32 2, <16 x i16> zeroinitializer, i16 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP2]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK:%.*]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i16> [[TMP3]] to i256
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
+; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
+; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES3:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 12, <16 x i16> [[SRC:%.*]], i16 [[MASK]])
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
+; CHECK: 15:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 16:
+; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 87960930222080
+; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
+; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr [[TMP19]], align 32
+; CHECK-NEXT: store <16 x i16> [[RES1]], ptr [[DST]], align 32
+; CHECK-NEXT: [[RES:%.*]] = add <16 x i16> [[RES2]], [[RES3]]
+; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i16> [[RES]]
+;
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 11, <16 x i16> zeroinitializer, i16 %mask)
+ %res3 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 12, <16 x i16> %src, i16 %mask)
+ store <16 x i16> %res1, ptr %dst
+ %res = add <16 x i16> %res2, %res3
+ ret <16 x i16> %res
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
+
+define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) #0 {
+; CHECK-LABEL: @test_cmpps(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 2, <16 x i1> splat (i1 true), i32 8)
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[RES]] to i16
+; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i16 [[TMP7]]
+;
+ %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
+ %1 = bitcast <16 x i1> %res to i16
+ ret i16 %1
+}
+declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32)
+
+define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) #0 {
+; CHECK-LABEL: @test_cmppd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 4, <8 x i1> splat (i1 true), i32 4)
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i1> [[RES]] to i8
+; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i8 [[TMP7]]
+;
+ %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
+ %1 = bitcast <8 x i1> %res to i8
+ ret i8 %1
+}
+declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32)
+
+
+ ; fp min - max
+define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) #0 {
+; CHECK-LABEL: @test_vmaxpd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 4)
+; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP7]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
+ ret <8 x double> %1
+}
+declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32)
+
+define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) #0 {
+; CHECK-LABEL: @test_vminpd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 4)
+; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP7]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
+ ret <8 x double> %1
+}
+declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32)
+
+define void @test_mask_store_ss(ptr %ptr, <4 x float> %data, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_store_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP1]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = and i8 [[TMP1]], 1
+; CHECK-NEXT: [[TMP7:%.*]] = or i8 [[TMP4]], [[TMP5]]
+; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[TMP9:%.*]] = and i8 [[MASK]], 1
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP8]] to <8 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP9]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[TMP14]], i32 1, <4 x i1> [[EXTRACT]])
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i1> [[_MSPROP]] to i4
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i4 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]]
+; CHECK: 16:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 17:
+; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[DATA:%.*]], ptr [[PTR]], i32 1, <4 x i1> [[EXTRACT]])
+; CHECK-NEXT: ret void
+;
+ %1 = and i8 %mask, 1
+ %2 = bitcast i8 %1 to <8 x i1>
+ %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ call void @llvm.masked.store.v4f32.p0(<4 x float> %data, ptr %ptr, i32 1, <4 x i1> %extract)
+ ret void
+}
+declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) #1
+
+
+declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32)
+declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32)
+declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32)
+
+define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) #0 {
+; CHECK-LABEL: @test_vsubps_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) #0 {
+; CHECK-LABEL: @test_vsubps_rd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) #0 {
+; CHECK-LABEL: @test_vsubps_ru(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) #0 {
+; CHECK-LABEL: @test_vsubps_rz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) #0 {
+; CHECK-LABEL: @test_vmulps_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) #0 {
+; CHECK-LABEL: @test_vmulps_rd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) #0 {
+; CHECK-LABEL: @test_vmulps_ru(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) #0 {
+; CHECK-LABEL: @test_vmulps_rz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_vmulps_mask_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_vmulps_mask_rd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_vmulps_mask_ru(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_vmulps_mask_rz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_vmulps_mask_passthru_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_vmulps_mask_passthru_rd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_vmulps_mask_passthru_ru(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_vmulps_mask_passthru_rz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
+ ret <16 x float> %3
+}
+
+define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_vmulpd_mask_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP16]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 8)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
+ ret <8 x double> %3
+}
+
+define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_vmulpd_mask_rd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 9)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP16]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 9)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
+ ret <8 x double> %3
+}
+
+define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_vmulpd_mask_ru(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 10)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP16]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 10)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
+ ret <8 x double> %3
+}
+
+define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_vmulpd_mask_rz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 11)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP16]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 11)
+ %2 = bitcast i8 %mask to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
+ ret <8 x double> %3
+}
+
+define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rn_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_add_round_ps_ru_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rz_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_add_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_add_round_ps_rn_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_add_round_ps_rd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_add_round_ps_ru_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_add_round_ps_rz_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_add_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_add_round_ps_rn_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_add_round_ps_rd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_add_round_ps_ru_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_add_round_ps_rz_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_add_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ ret <16 x float> %1
+}
+declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32)
+
+define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rn_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_sub_round_ps_ru_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rz_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_sub_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_sub_round_ps_rn_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_sub_round_ps_rd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_sub_round_ps_ru_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_sub_round_ps_rz_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_sub_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rn_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_div_round_ps_ru_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rz_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_div_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_div_round_ps_rn_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_div_round_ps_rd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_div_round_ps_ru_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_div_round_ps_rz_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_div_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_div_round_ps_rn_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_div_round_ps_rd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_div_round_ps_ru_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_div_round_ps_rz_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_div_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ ret <16 x float> %1
+}
+declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32)
+
+define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_min_round_ps_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_min_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_min_round_ps_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_min_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_min_round_ps_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_min_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ ret <16 x float> %1
+}
+declare <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float>, <16 x float>, i32)
+
+define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_max_round_ps_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_maskz_max_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_max_round_ps_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_mask_max_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ %2 = bitcast i16 %mask to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
+ ret <16 x float> %3
+}
+
+define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_max_round_ps_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
+; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
+ ret <16 x float> %1
+}
+
+define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_mm512_max_round_ps_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
+; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
+ ret <16 x float> %1
+}
+declare <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float>, <16 x float>, i32)
+
+declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_ss_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_ss_rd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 9)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_ss_ru(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 10)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 10)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_ss_rz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 11)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 11)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_ss_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_add_ss_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) #0 {
+; CHECK-LABEL: @test_add_ss_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, ptr %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_ss_current_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
+; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
+; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
+; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
+; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %a1.val = load float, ptr %a1
+ %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
+ %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
+ %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
+ %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_add_ss_current_memfold(<4 x float> %a0, ptr %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_add_ss_current_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
+; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
+; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
+; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
+; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
+; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
+; CHECK: 11:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 12:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %a1.val = load float, ptr %a1
+ %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
+ %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
+ %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
+ %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
+ %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_sd_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_sd_rd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 9)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_sd_ru(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 10)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 10)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_sd_rz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 11)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 11)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_sd_current(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_add_sd_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_add_sd_rn(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 8)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_add_sd_current_memfold(<2 x double> %a0, ptr %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_add_sd_current_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
+; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
+; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
+; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %a1.val = load double, ptr %a1
+ %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
+ %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_add_sd_current_memfold(<2 x double> %a0, ptr %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_add_sd_current_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
+; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
+; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
+; CHECK: 11:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 12:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %a1.val = load double, ptr %a1
+ %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
+ %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
+ %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_max_ss_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_max_ss_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) #0 {
+; CHECK-LABEL: @test_max_ss_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_max_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_max_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) #0 {
+; CHECK-LABEL: @test_max_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_max_ss_memfold(<4 x float> %a0, ptr %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_max_ss_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
+; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
+; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
+; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
+; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %a1.val = load float, ptr %a1
+ %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
+ %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
+ %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
+ %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_maskz_max_ss_memfold(<4 x float> %a0, ptr %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_max_ss_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
+; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
+; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
+; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
+; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
+; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
+; CHECK: 11:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 12:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %a1.val = load float, ptr %a1
+ %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
+ %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
+ %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
+ %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
+ %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4)
+ ret <4 x float> %res
+}
+declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_max_sd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_max_sd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_max_sd_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 8)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_max_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_max_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_max_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_max_sd_memfold(<2 x double> %a0, ptr %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_max_sd_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
+; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
+; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
+; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %a1.val = load double, ptr %a1
+ %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
+ %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_maskz_max_sd_memfold(<2 x double> %a0, ptr %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_max_sd_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
+; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
+; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
+; CHECK: 11:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 12:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %a1.val = load double, ptr %a1
+ %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
+ %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
+ %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4)
+ ret <2 x double> %res
+}
+
+define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) #0 {
+; CHECK-LABEL: @test_x86_avx512_cvtsi2ss32(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 11)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 11) ; <<<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
+
+define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) #0 {
+; CHECK-LABEL: @test_x86_avx512__mm_cvt_roundu32_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 9)
+; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, ptr %ptr) #0 {
+; CHECK-LABEL: @test_x86_avx512__mm_cvt_roundu32_ss_mem(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSLD]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B]], i32 9)
+; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %b = load i32, ptr %ptr
+ %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) #0 {
+; CHECK-LABEL: @test_x86_avx512__mm_cvtu32_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 4)
+; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, ptr %ptr) #0 {
+; CHECK-LABEL: @test_x86_avx512__mm_cvtu32_ss_mem(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSLD]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B]], i32 4)
+; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %b = load i32, ptr %ptr
+ %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
+ ret <4 x float> %res
+}
+declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
+
+declare <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>)
+
+define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X2P]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]]
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]])
+; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP9]]
+;
+ %x2 = load <16 x i32>, ptr %x2p
+ %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ ret <16 x i32> %1
+}
+
+define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]]
+; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[X1]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[X1]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP17]]
+;
+ %x2 = load <16 x i32>, ptr %x2p
+ %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
+ ret <16 x i32> %3
+}
+
+declare <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>)
+
+define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP9]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2)
+ ret <8 x double> %1
+}
+
+define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> zeroinitializer, <8 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x double> [[TMP10]] to <8 x i64>
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64>
+; CHECK-NEXT: [[TMP17:%.*]] = xor <8 x i64> [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP19]], <8 x i64> [[TMP14]]
+; CHECK-NEXT: [[TMP20:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP10]], <8 x double> [[TMP11]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP20]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2)
+ %2 = bitcast <8 x i64> %x1 to <8 x double>
+ %3 = bitcast i8 %x3 to <8 x i1>
+ %4 = select <8 x i1> %3, <8 x double> %1, <8 x double> %2
+ ret <8 x double> %4
+}
+
+declare <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>)
+
+define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP9]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2)
+ ret <16 x float> %1
+}
+
+define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> zeroinitializer, <16 x i32> [[TMP2]]
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32>
+; CHECK-NEXT: [[TMP17:%.*]] = xor <16 x i32> [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i32> [[TMP18]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP19]], <16 x i32> [[TMP14]]
+; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP13]], <16 x float> [[TMP10]], <16 x float> [[TMP11]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP20]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2)
+ %2 = bitcast <16 x i32> %x1 to <16 x float>
+ %3 = bitcast i16 %x3 to <16 x i1>
+ %4 = select <16 x i1> %3, <16 x float> %1, <16 x float> %2
+ ret <16 x float> %4
+}
+
+declare <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>)
+
+define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]])
+; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP4]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
+ ret <8 x i64> %1
+}
+
+define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP12]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x1
+ ret <8 x i64> %3
+}
+
+define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]]
+; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP17]]
+;
+ %x2 = load <16 x i32>, ptr %x2p
+ %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
+}
+
+define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, ptr %x2ptr, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[X2S:%.*]] = load double, ptr [[X2PTR:%.*]], align 8
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2PTR]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
+; CHECK-NEXT: [[X2INS:%.*]] = insertelement <8 x double> undef, double [[X2S]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer
+; CHECK-NEXT: [[X2:%.*]] = shufflevector <8 x double> [[X2INS]], <8 x double> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x double> [[X2]])
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x double> [[TMP15]] to <8 x i64>
+; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP19]], zeroinitializer
+; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer
+; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP22]], <8 x i64> [[TMP18]]
+; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP17]], <8 x double> [[TMP15]], <8 x double> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP23]]
+;
+ %x2s = load double, ptr %x2ptr
+ %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
+ %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
+ %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x1, <8 x i64> %x0, <8 x double> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
+ ret <8 x double> %3
+}
+
+define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x float> [[X2:%.*]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP17]], <16 x i32> [[TMP13]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP12]], <16 x float> [[TMP10]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0, <16 x float> %x2)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x i64> [[X2:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP12]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
+ ret <8 x i64> %3
+}
+
+define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]])
+; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP4]]
+;
+ %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
+ ret <16 x i32> %1
+}
+
+define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]]
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP1]]
+; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X1]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP1]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X1]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP12]]
+;
+ %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
+ ret <16 x i32> %3
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
+define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 11)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x double> [[X2]], i8 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES2]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 11)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 8)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
+define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x float> [[X2:%.*]], i16 [[X3:%.*]], i32 10)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x float> [[X2]], i16 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES2]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 10)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 8)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qb_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[RES4]]
+;
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qb_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qb_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[RES4]]
+;
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qb_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qb_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[RES4]]
+;
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qb_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qw_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[RES4]]
+;
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qw_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qw_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[RES4]]
+;
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qw_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qw_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i16> [[RES4]]
+;
+ %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res3 = add <8 x i16> %res0, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qw_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+define <8 x i32>@test_int_x86_avx512_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_pmov_qd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32>
+; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[TMP2]]
+;
+ %1 = trunc <8 x i64> %x0 to <8 x i32>
+ ret <8 x i32> %1
+}
+
+define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[_MSPROP]], <8 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP4]], [[X1:%.*]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i32> [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP10]], <8 x i32> [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP4]], <8 x i32> [[X1]]
+; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[TMP11]]
+;
+ %1 = trunc <8 x i64> %x0 to <8 x i32>
+ %2 = bitcast i8 %x2 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
+ ret <8 x i32> %3
+}
+
+define <8 x i32>@test_int_x86_avx512_maskz_pmov_qd_512(<8 x i64> %x0, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_pmov_qd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[_MSPROP]], <8 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i32> [[TMP7]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP9]], <8 x i32> [[TMP6]]
+; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP3]], <8 x i32> zeroinitializer
+; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[TMP10]]
+;
+ %1 = trunc <8 x i64> %x0 to <8 x i32>
+ %2 = bitcast i8 %x2 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
+ ret <8 x i32> %3
+}
+
+declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmov_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qd_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_pmovs_qd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[RES]]
+;
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]])
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[RES]]
+;
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+ ret <8 x i32> %res
+}
+
+define <8 x i32>@test_int_x86_avx512_maskz_pmovs_qd_512(<8 x i64> %x0, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovs_qd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[X2:%.*]])
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[RES]]
+;
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ ret <8 x i32> %res
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qd_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_pmovus_qd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[RES]]
+;
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
+ ret <8 x i32> %res
+}
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]])
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[RES]]
+;
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
+ ret <8 x i32> %res
+}
+
+define <8 x i32>@test_int_x86_avx512_maskz_pmovus_qd_512(<8 x i64> %x0, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovus_qd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[X2:%.*]])
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[RES]]
+;
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ ret <8 x i32> %res
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64>, i8)
+
+define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qd_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
+ call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_db_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[RES4]]
+;
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_db_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_db_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[RES4]]
+;
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_db_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
+
+define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_db_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i8> [[RES4]]
+;
+ %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
+ %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
+ %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i8> %res0, %res1
+ %res4 = add <16 x i8> %res3, %res2
+ ret <16 x i8> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_db_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_dw_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i16> [[RES4]]
+;
+ %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i16> %res0, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmov_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_dw_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_dw_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i16> [[RES4]]
+;
+ %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i16> %res0, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_dw_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_dw_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]])
+; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i16> [[RES4]]
+;
+ %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res3 = add <16 x i16> %res0, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32>, i16)
+
+define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_dw_mem_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
+; CHECK-NEXT: ret void
+;
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
+ call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
+ ret void
+}
+
+declare <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32>, i32)
+
+define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_dq2ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[CVT:%.*]] = sitofp <16 x i32> [[X0:%.*]] to <16 x float>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[CVT]] to <16 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP1]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP11]], <16 x i32> [[TMP6]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[CVT]], <16 x float> [[X1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> [[X0]], i32 8)
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], zeroinitializer
+; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[TMP12]], [[TMP16]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES2]]
+;
+ %cvt = sitofp <16 x i32> %x0 to <16 x float>
+ %1 = bitcast i16 %x2 to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
+ %3 = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8)
+ %res2 = fadd <16 x float> %2, %3
+ ret <16 x float> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2dq_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[RES2]]
+;
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
+
+define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> [[X0:%.*]], <8 x float> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> [[X0]], <8 x float> [[X1]], i8 -1, i32 10)
+; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x float> [[RES]], [[RES1]]
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x float> [[RES2]]
+;
+ %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 10)
+ %res2 = fadd <8 x float> %res, %res1
+ ret <8 x float> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2udq_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 10)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[RES2]]
+;
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 10)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2dq_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 10)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> [[X0:%.*]], <8 x double> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> [[X0]], <8 x double> [[X1]], i8 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES2]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2udq_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 10)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_pd2dq_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[RES2]]
+;
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32>, i32)
+
+define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_udq2ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[CVT:%.*]] = uitofp <16 x i32> [[X0:%.*]] to <16 x float>
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[CVT]] to <16 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP1]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP11]], <16 x i32> [[TMP6]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[CVT]], <16 x float> [[X1]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> [[X0]], i32 8)
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], zeroinitializer
+; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[TMP12]], [[TMP16]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES2]]
+;
+ %cvt = uitofp <16 x i32> %x0 to <16 x float>
+ %1 = bitcast i16 %x2 to <16 x i1>
+ %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
+ %3 = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8)
+ %res2 = fadd <16 x float> %2, %3
+ ret <16 x float> %res2
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
+
+define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_pd2udq_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
+; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i32> [[RES2]]
+;
+ %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
+ %res2 = add <8 x i32> %res, %res1
+ ret <8 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_ps2dq_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
+
+define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_ps2udq_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
+ %res2 = add <16 x i32> %res, %res1
+ ret <16 x i32> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
+
+define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1) #0 {
+; CHECK-LABEL: @test_getexp_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
+ ret <4 x float> %res
+}
+
+define <4 x float> @test_mask_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_getexp_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES0:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
+; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]], i8 [[MASK]], i32 8)
+; CHECK-NEXT: [[RES_1:%.*]] = fadd <4 x float> [[RES0]], [[RES1]]
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES_1]]
+;
+ %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
+ %res.1 = fadd <4 x float> %res0, %res1
+ ret <4 x float> %res.1
+}
+
+define <4 x float> @test_maskz_getexp_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_getexp_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
+ ret <4 x float> %res
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
+
+define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_getexp_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 4)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
+ ret <2 x double> %res
+}
+
+define <2 x double> @test_mask_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_mask_getexp_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES0:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
+; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]], i8 [[MASK]], i32 8)
+; CHECK-NEXT: [[RES_1:%.*]] = fadd <2 x double> [[RES0]], [[RES1]]
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES_1]]
+;
+ %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
+ %res.1 = fadd <2 x double> %res0, %res1
+ ret <2 x double> %res.1
+}
+
+define <2 x double> @test_maskz_getexp_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_maskz_getexp_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8)
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
+ ret <2 x double> %res
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
+
+define i8 at test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 5, i8 [[X3:%.*]], i32 8)
+; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i8 [[RES4]]
+;
+ %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
+ ret i8 %res4
+}
+
+define i8 at test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_sd_all(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 2, i8 -1, i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 3, i8 -1, i32 8)
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]]
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[RES3:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 4, i8 [[X3:%.*]], i32 4)
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP10:%.*]] = icmp ne i128 [[TMP16]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP17]], 0
+; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSCMP10]], [[_MSCMP11]]
+; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR14:%.*]] = or i1 [[_MSOR12]], [[_MSCMP13]]
+; CHECK-NEXT: br i1 [[_MSOR14]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
+; CHECK: 18:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 19:
+; CHECK-NEXT: [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 5, i8 [[X3]], i32 8)
+; CHECK-NEXT: [[TMP20:%.*]] = xor i8 [[RES1]], -1
+; CHECK-NEXT: [[TMP21:%.*]] = xor i8 [[RES2]], -1
+; CHECK-NEXT: [[TMP22:%.*]] = and i8 [[TMP20]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = and i8 0, [[TMP21]]
+; CHECK-NEXT: [[TMP24:%.*]] = or i8 0, [[TMP22]]
+; CHECK-NEXT: [[TMP25:%.*]] = or i8 [[TMP24]], [[TMP23]]
+; CHECK-NEXT: [[RES11:%.*]] = or i8 [[RES1]], [[RES2]]
+; CHECK-NEXT: [[TMP26:%.*]] = xor i8 [[RES3]], -1
+; CHECK-NEXT: [[TMP27:%.*]] = xor i8 [[RES4]], -1
+; CHECK-NEXT: [[TMP28:%.*]] = and i8 [[TMP26]], 0
+; CHECK-NEXT: [[TMP29:%.*]] = and i8 0, [[TMP27]]
+; CHECK-NEXT: [[TMP30:%.*]] = or i8 0, [[TMP28]]
+; CHECK-NEXT: [[TMP31:%.*]] = or i8 [[TMP30]], [[TMP29]]
+; CHECK-NEXT: [[RES12:%.*]] = or i8 [[RES3]], [[RES4]]
+; CHECK-NEXT: [[TMP32:%.*]] = xor i8 [[RES11]], -1
+; CHECK-NEXT: [[TMP33:%.*]] = xor i8 [[RES12]], -1
+; CHECK-NEXT: [[TMP34:%.*]] = and i8 [[TMP25]], [[TMP31]]
+; CHECK-NEXT: [[TMP35:%.*]] = and i8 [[TMP32]], [[TMP31]]
+; CHECK-NEXT: [[TMP36:%.*]] = and i8 [[TMP25]], [[TMP33]]
+; CHECK-NEXT: [[TMP37:%.*]] = or i8 [[TMP34]], [[TMP35]]
+; CHECK-NEXT: [[TMP38:%.*]] = or i8 [[TMP37]], [[TMP36]]
+; CHECK-NEXT: [[RES13:%.*]] = or i8 [[RES11]], [[RES12]]
+; CHECK-NEXT: store i8 [[TMP38]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i8 [[RES13]]
+;
+ %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
+ %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
+ %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
+ %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
+
+ %res11 = or i8 %res1, %res2
+ %res12 = or i8 %res3, %res4
+ %res13 = or i8 %res11, %res12
+ ret i8 %res13
+}
+
+declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
+
+define i8 at test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 3, i8 [[X3:%.*]], i32 4)
+; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i8 [[RES2]]
+;
+ %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
+ ret i8 %res2
+}
+
+
+define i8 at test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_ss_all(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES1:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 2, i8 -1, i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 3, i8 -1, i32 8)
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP13]], 0
+; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]]
+; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
+; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[RES3:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 4, i8 [[X3:%.*]], i32 4)
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP10:%.*]] = icmp ne i128 [[TMP16]], 0
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP17]], 0
+; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSCMP10]], [[_MSCMP11]]
+; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR14:%.*]] = or i1 [[_MSOR12]], [[_MSCMP13]]
+; CHECK-NEXT: br i1 [[_MSOR14]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
+; CHECK: 18:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 19:
+; CHECK-NEXT: [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 5, i8 [[X3]], i32 8)
+; CHECK-NEXT: [[TMP20:%.*]] = and i8 [[RES1]], 0
+; CHECK-NEXT: [[TMP21:%.*]] = and i8 0, [[RES2]]
+; CHECK-NEXT: [[TMP22:%.*]] = or i8 0, [[TMP20]]
+; CHECK-NEXT: [[TMP23:%.*]] = or i8 [[TMP22]], [[TMP21]]
+; CHECK-NEXT: [[RES11:%.*]] = and i8 [[RES1]], [[RES2]]
+; CHECK-NEXT: [[TMP24:%.*]] = and i8 [[RES3]], 0
+; CHECK-NEXT: [[TMP25:%.*]] = and i8 0, [[RES4]]
+; CHECK-NEXT: [[TMP26:%.*]] = or i8 0, [[TMP24]]
+; CHECK-NEXT: [[TMP27:%.*]] = or i8 [[TMP26]], [[TMP25]]
+; CHECK-NEXT: [[RES12:%.*]] = and i8 [[RES3]], [[RES4]]
+; CHECK-NEXT: [[TMP28:%.*]] = and i8 [[TMP23]], [[TMP27]]
+; CHECK-NEXT: [[TMP29:%.*]] = and i8 [[RES11]], [[TMP27]]
+; CHECK-NEXT: [[TMP30:%.*]] = and i8 [[TMP23]], [[RES12]]
+; CHECK-NEXT: [[TMP31:%.*]] = or i8 [[TMP28]], [[TMP29]]
+; CHECK-NEXT: [[TMP32:%.*]] = or i8 [[TMP31]], [[TMP30]]
+; CHECK-NEXT: [[RES13:%.*]] = and i8 [[RES11]], [[RES12]]
+; CHECK-NEXT: store i8 [[TMP32]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i8 [[RES13]]
+;
+ %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
+ %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
+ %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
+ %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
+
+ %res11 = and i8 %res1, %res2
+ %res12 = and i8 %res3, %res4
+ %res13 = and i8 %res11, %res12
+ ret i8 %res13
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> [[X0:%.*]], i32 11, <8 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> [[X0]], i32 11, <8 x double> [[X2]], i8 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES2]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
+ %res2 = fadd <8 x double> %res, %res1
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> [[X0:%.*]], i32 11, <16 x float> [[X2:%.*]], i16 [[X3:%.*]], i32 4)
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
+; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
+; CHECK: 10:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 11:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> [[X0]], i32 11, <16 x float> [[X2]], i16 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES2]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
+ %res2 = fadd <16 x float> %res, %res1
+ ret <16 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 11, <2 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 12, <2 x double> zeroinitializer, i8 [[X3]], i32 4)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
+; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
+; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 13, <2 x double> [[X2]], i8 [[X3]], i32 8)
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0
+; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP21:%.*]] = icmp ne i128 [[TMP21]], 0
+; CHECK-NEXT: [[_MSOR22:%.*]] = or i1 [[_MSOR20]], [[_MSCMP21]]
+; CHECK-NEXT: br i1 [[_MSOR22]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
+; CHECK: 22:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 23:
+; CHECK-NEXT: [[RES3:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 14, <2 x double> [[X2]], i8 -1, i32 4)
+; CHECK-NEXT: [[RES11:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES12:%.*]] = fadd <2 x double> [[RES2]], [[RES3]]
+; CHECK-NEXT: [[RES13:%.*]] = fadd <2 x double> [[RES11]], [[RES12]]
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES13]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 12, <2 x double> zeroinitializer, i8 %x3, i32 4)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 13, <2 x double> %x2, i8 %x3, i32 8)
+ %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 14, <2 x double> %x2, i8 -1, i32 4)
+ %res11 = fadd <2 x double> %res, %res1
+ %res12 = fadd <2 x double> %res2, %res3
+ %res13 = fadd <2 x double> %res11, %res12
+ ret <2 x double> %res13
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 11, <4 x float> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 12, <4 x float> zeroinitializer, i8 [[X3]], i32 4)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
+; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 13, <4 x float> [[X2]], i8 -1, i32 8)
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i128 [[TMP19]], 0
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i128 [[TMP20]], 0
+; CHECK-NEXT: [[_MSOR18:%.*]] = or i1 [[_MSCMP16]], [[_MSCMP17]]
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP21]], 0
+; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSOR18]], [[_MSCMP19]]
+; CHECK-NEXT: br i1 [[_MSOR20]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
+; CHECK: 22:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 23:
+; CHECK-NEXT: [[RES3:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 14, <4 x float> [[X2]], i8 -1, i32 4)
+; CHECK-NEXT: [[RES11:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES12:%.*]] = fadd <4 x float> [[RES2]], [[RES3]]
+; CHECK-NEXT: [[RES13:%.*]] = fadd <4 x float> [[RES11]], [[RES12]]
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES13]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 12, <4 x float> zeroinitializer, i8 %x3, i32 4)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 13, <4 x float> %x2, i8 -1, i32 8)
+ %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 14, <4 x float> %x2, i8 -1, i32 4)
+ %res11 = fadd <4 x float> %res, %res1
+ %res12 = fadd <4 x float> %res2, %res3
+ %res13 = fadd <4 x float> %res11, %res12
+ ret <4 x float> %res13
+}
+
+define <4 x float> @test_int_x86_avx512_mask_getmant_ss_load(<4 x float> %x0, ptr %x1p) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ss_load(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[X1:%.*]] = load <4 x float>, ptr [[X1P:%.*]], align 16
+; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[X1P]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
+; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1]], i32 11, <4 x float> undef, i8 -1, i32 4)
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES]]
+;
+ %x1 = load <4 x float>, ptr %x1p
+ %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> undef, i8 -1, i32 4)
+ ret <4 x float> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
+
+define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
+ ret <8 x double> %res
+}
+
+define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %mask) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512_mask(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP10]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x double> [[RES]], <8 x double> [[X2]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES2]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> %x2
+ ret <8 x double> %res2
+}
+
+define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_maskz(<8 x double> %x0, <8 x i64> %x1, i8 %mask) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512_maskz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64>
+; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP13]], <8 x i64> [[TMP9]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x double> [[RES]], <8 x double> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES2]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> zeroinitializer
+ ret <8 x double> %res2
+}
+
+declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>)
+
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
+ ret <16 x float> %res
+}
+
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_mask(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP10]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> [[X2]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES2]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
+ ret <16 x float> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_maskz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP13]], <16 x i32> [[TMP9]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES2]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
+ ret <16 x float> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 4:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
+ ret <16 x float> %res
+}
+
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], [[TMP10]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP13]], <16 x i32> [[TMP8]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> [[X2]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES2]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
+ ret <16 x float> %res2
+}
+
+define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP7]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES2]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
+ ret <16 x float> %res2
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double>, <4 x float>, <2 x double>, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<2 x double> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ss2sd_round(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> [[X0:%.*]], <4 x float> [[X1:%.*]], <2 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> [[X0]], <4 x float> [[X1]], <2 x double> [[X2]], i8 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES2]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
+ %res2 = fadd <2 x double> %res, %res1
+ ret <2 x double> %res2
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float>, <2 x double>, <4 x float>, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_sd2ss_round(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> [[X0:%.*]], <2 x double> [[X1:%.*]], <4 x float> [[X2:%.*]], i8 [[X3:%.*]], i32 11)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
+; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> [[X0]], <2 x double> [[X1]], <4 x float> [[X2]], i8 -1, i32 8)
+; CHECK-NEXT: [[RES2:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES2]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 11)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
+ %res2 = fadd <4 x float> %res, %res1
+ ret <4 x float> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
+
+define <16 x i32>@test_int_x86_avx512_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_pternlog_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP9]]
+;
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
+ ret <16 x i32> %1
+}
+
+define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pternlog_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33)
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> [[TMP1]]
+; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[X0]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP1]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[X0]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP17]]
+;
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
+ %2 = bitcast i16 %x4 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
+ ret <16 x i32> %3
+}
+
+define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_pternlog_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33)
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP17]]
+;
+ %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
+ %2 = bitcast i16 %x4 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
+}
+
+declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32)
+
+define <8 x i64>@test_int_x86_avx512_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_pternlog_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33)
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP9]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
+ ret <8 x i64> %1
+}
+
+define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_pternlog_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33)
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> zeroinitializer, <8 x i64> [[TMP1]]
+; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], [[X0]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP1]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> [[X0]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP17]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
+ ret <8 x i64> %3
+}
+
+define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_pternlog_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33)
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP17]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
+ %2 = bitcast i8 %x4 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
+ ret <8 x i64> %3
+}
+
+define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_comi_sd_eq_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 0, i32 8)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_ucomi_sd_eq_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 8, i32 8)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_comi_sd_eq(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 0, i32 4)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_ucomi_sd_eq(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 8, i32 4)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_comi_sd_lt_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 1, i32 8)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_ucomi_sd_lt_sae(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 9, i32 8)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_comi_sd_lt(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 1, i32 4)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
+ ret i32 %res
+}
+
+define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_ucomi_sd_lt(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 9, i32 4)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
+ ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
+
+define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_ucomi_ss_lt(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i32 9, i32 4)
+; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
+ ret i32 %res
+}
+
+declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
+
+declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
+
+define <8 x double>@test_int_x86_avx512_permvar_df_512(<8 x double> %x0, <8 x i64> %x1) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_permvar_df_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP7]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
+ ret <8 x double> %1
+}
+
+define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_df_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP17]], <8 x i64> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP11]], <8 x double> [[TMP9]], <8 x double> [[X2]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP18]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2
+ ret <8 x double> %3
+}
+
+define <8 x double>@test_int_x86_avx512_maskz_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_df_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[TMP16]]
+;
+ %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
+ ret <8 x double> %3
+}
+
+declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
+
+define <8 x i64>@test_int_x86_avx512_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_permvar_di_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP3]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
+ ret <8 x i64> %1
+}
+
+define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_di_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP12]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
+ ret <8 x i64> %3
+}
+
+define <8 x i64>@test_int_x86_avx512_maskz_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_di_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[TMP11]]
+;
+ %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
+ %2 = bitcast i8 %x3 to <8 x i1>
+ %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
+ ret <8 x i64> %3
+}
+
+declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
+
+define <16 x float>@test_int_x86_avx512_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_permvar_sf_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP7]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
+ ret <16 x float> %1
+}
+
+define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_sf_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[X2]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP18]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2
+ ret <16 x float> %3
+}
+
+define <16 x float>@test_int_x86_avx512_maskz_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_sf_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP16]]
+;
+ %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
+ ret <16 x float> %3
+}
+
+declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
+
+define <16 x i32>@test_int_x86_avx512_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_permvar_si_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP3]]
+;
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
+ ret <16 x i32> %1
+}
+
+define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_si_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP12]]
+;
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
+ ret <16 x i32> %3
+}
+
+define <16 x i32>@test_int_x86_avx512_maskz_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_si_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[TMP11]]
+;
+ %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
+ %2 = bitcast i16 %x3 to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
+ ret <16 x i32> %3
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 4, i8 [[X4:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> [[X1]], <8 x i64> [[X2]], i32 5, i8 [[X4]], i32 4)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
+; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> [[X2]], i32 3, i8 -1, i32 8)
+; CHECK-NEXT: [[RES3:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <8 x double> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES4]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 8)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res3, %res2
+ ret <8 x double> %res4
+}
+
+define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512_load(<8 x double> %x0, <8 x double> %x1, ptr %x2ptr) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_pd_512_load(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[X2:%.*]] = load <8 x i64>, ptr [[X2PTR:%.*]], align 64
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X2PTR]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP8]], align 64
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[_MSLD]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
+; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2]], i32 3, i8 -1, i32 4)
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES]]
+;
+ %x2 = load <8 x i64>, ptr %x2ptr
+ %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 4)
+ ret <8 x double> %res
+}
+
+declare <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)
+
+define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_pd_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 3, i8 [[X4:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 4)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
+; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> [[X2]], i32 2, i8 -1, i32 8)
+; CHECK-NEXT: [[RES3:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <8 x double> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES4]]
+;
+ %res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4)
+ %res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4)
+ %res2 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 2, i8 -1, i32 8)
+ %res3 = fadd <8 x double> %res, %res1
+ %res4 = fadd <8 x double> %res3, %res2
+ ret <8 x double> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], <4 x i32> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> zeroinitializer, i32 5, i8 [[X4]], i32 4)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
+; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> [[X2]], i32 5, i8 -1, i32 8)
+; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES4]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 8)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res3, %res2
+ ret <4 x float> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32)
+
+define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], <4 x i32> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> zeroinitializer, i32 5, i8 [[X4]], i32 8)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
+; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> [[X2]], i32 6, i8 -1, i32 4)
+; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES4]]
+;
+ %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
+ %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8)
+ %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 6, i8 -1, i32 4)
+ %res3 = fadd <4 x float> %res, %res1
+ %res4 = fadd <4 x float> %res3, %res2
+ ret <4 x float> %res4
+}
+
+declare <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 5, i16 [[X4:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i16 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> zeroinitializer, i32 5, i16 [[X4]], i32 4)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
+; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> [[X2]], i32 5, i16 -1, i32 8)
+; CHECK-NEXT: [[RES3:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <16 x float> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES4]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 8)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res3, %res2
+ ret <16 x float> %res4
+}
+
+define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512_load(<16 x float> %x0, <16 x float> %x1, ptr %x2ptr) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ps_512_load(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2PTR:%.*]], align 64
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X2PTR]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
+; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2]], i32 5, i16 -1, i32 4)
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES]]
+;
+ %x2 = load <16 x i32>, ptr %x2ptr
+ %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4)
+ ret <16 x float> %res
+}
+
+declare <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32)
+
+define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_ps_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 5, i16 [[X4:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i16 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> zeroinitializer, i32 6, i16 [[X4]], i32 8)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
+; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> [[X2]], i32 7, i16 -1, i32 4)
+; CHECK-NEXT: [[RES3:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <16 x float> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES4]]
+;
+ %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
+ %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 6, i16 %x4, i32 8)
+ %res2 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 7, i16 -1, i32 4)
+ %res3 = fadd <16 x float> %res, %res1
+ %res4 = fadd <16 x float> %res3, %res2
+ ret <16 x float> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], <2 x i64> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 8)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
+; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> [[X2]], i32 6, i8 -1, i32 4)
+; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES4]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 6, i8 -1, i32 4)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res3, %res2
+ ret <2 x double> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32)
+
+define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
+; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
+; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], <2 x i64> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
+; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
+; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
+; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
+; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 8)
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
+; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
+; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
+; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
+; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
+; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
+; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 17:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 18:
+; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> [[X2]], i32 5, i8 [[X4]], i32 8)
+; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[RES3]], [[RES2]]
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES4]]
+;
+ %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
+ %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
+ %res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 8)
+ %res3 = fadd <2 x double> %res, %res1
+ %res4 = fadd <2 x double> %res3, %res2
+ ret <2 x double> %res4
+}
+
+declare double @llvm.fma.f64(double, double, double) #1
+declare double @llvm.x86.avx512.vfmadd.f64(double, double, double, i32) #0
+
+define <2 x double> @test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
+; CHECK-NEXT: [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
+; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 [[_MSPROP]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast double [[TMP8]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast double [[TMP5]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[_MSPROP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[_MSPROP]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP17]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], double [[TMP8]], double [[TMP5]]
+; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[X0]], double [[TMP18]], i64 0
+; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0
+; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i64 [[_MSPROP8]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
+; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i64 [[_MSPROP9]], 0
+; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
+; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
+; CHECK: 23:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 24:
+; CHECK-NEXT: [[TMP25:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP20]], double [[TMP21]], double [[TMP22]], i32 11)
+; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <2 x i64> [[TMP1]], i64 0, i64 0
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X0]], double [[TMP25]], i64 0
+; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP11]], 0
+; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP12]], 0
+; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
+; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
+; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
+; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
+; CHECK: 30:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 31:
+; CHECK-NEXT: [[TMP32:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP27]], double [[TMP28]], double [[TMP29]], i32 10)
+; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
+; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 [[_MSPROP11]]
+; CHECK-NEXT: [[TMP37:%.*]] = bitcast double [[TMP32]] to i64
+; CHECK-NEXT: [[TMP38:%.*]] = bitcast double [[TMP27]] to i64
+; CHECK-NEXT: [[TMP39:%.*]] = xor i64 [[TMP37]], [[TMP38]]
+; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP39]], 0
+; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP40]], [[_MSPROP11]]
+; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i64 [[TMP41]], i64 [[TMP36]]
+; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], double [[TMP32]], double [[TMP27]]
+; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT15]], i64 0
+; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x double> [[X0]], double [[TMP42]], i64 0
+; CHECK-NEXT: [[_MSPROP17:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP10]]
+; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP19]], [[TMP26]]
+; CHECK-NEXT: [[_MSPROP18:%.*]] = or <2 x i64> [[_MSPROP16]], [[_MSPROP17]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP43]], [[RES3]]
+; CHECK-NEXT: store <2 x i64> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES4]]
+;
+ %1 = extractelement <2 x double> %x0, i64 0
+ %2 = extractelement <2 x double> %x1, i64 0
+ %3 = extractelement <2 x double> %x2, i64 0
+ %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, double %4, double %1
+ %8 = insertelement <2 x double> %x0, double %7, i64 0
+ %9 = extractelement <2 x double> %x0, i64 0
+ %10 = extractelement <2 x double> %x1, i64 0
+ %11 = extractelement <2 x double> %x2, i64 0
+ %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
+ %13 = insertelement <2 x double> %x0, double %12, i64 0
+ %14 = extractelement <2 x double> %x0, i64 0
+ %15 = extractelement <2 x double> %x1, i64 0
+ %16 = extractelement <2 x double> %x2, i64 0
+ %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 10)
+ %18 = bitcast i8 %x3 to <8 x i1>
+ %19 = extractelement <8 x i1> %18, i64 0
+ %20 = select i1 %19, double %17, double %14
+ %21 = insertelement <2 x double> %x0, double %20, i64 0
+ %res3 = fadd <2 x double> %8, %13
+ %res4 = fadd <2 x double> %21, %res3
+ ret <2 x double> %res4
+}
+
+define <4 x float> @test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
+; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
+; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 [[_MSPROP]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast float [[TMP8]] to i32
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP5]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[_MSPROP]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP17]], i32 [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], float [[TMP8]], float [[TMP5]]
+; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X0]], float [[TMP18]], i64 0
+; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0
+; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i32 [[_MSPROP8]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
+; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i32 [[_MSPROP9]], 0
+; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
+; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
+; CHECK: 23:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 24:
+; CHECK-NEXT: [[TMP25:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP20]], float [[TMP21]], float [[TMP22]], i32 11)
+; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i64 0
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X0]], float [[TMP25]], i64 0
+; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP11]], 0
+; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP12]], 0
+; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
+; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
+; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
+; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
+; CHECK: 30:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 31:
+; CHECK-NEXT: [[TMP32:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP27]], float [[TMP28]], float [[TMP29]], i32 10)
+; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
+; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 0, i32 [[_MSPROP11]]
+; CHECK-NEXT: [[TMP37:%.*]] = bitcast float [[TMP32]] to i32
+; CHECK-NEXT: [[TMP38:%.*]] = bitcast float [[TMP27]] to i32
+; CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[TMP37]], [[TMP38]]
+; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], 0
+; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP40]], [[_MSPROP11]]
+; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP41]], i32 [[TMP36]]
+; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], float [[TMP32]], float [[TMP27]]
+; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT15]], i64 0
+; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[X0]], float [[TMP42]], i64 0
+; CHECK-NEXT: [[_MSPROP17:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP10]]
+; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP19]], [[TMP26]]
+; CHECK-NEXT: [[_MSPROP18:%.*]] = or <4 x i32> [[_MSPROP16]], [[_MSPROP17]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP43]], [[RES3]]
+; CHECK-NEXT: store <4 x i32> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES4]]
+;
+ %1 = extractelement <4 x float> %x0, i64 0
+ %2 = extractelement <4 x float> %x1, i64 0
+ %3 = extractelement <4 x float> %x2, i64 0
+ %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, float %4, float %1
+ %8 = insertelement <4 x float> %x0, float %7, i64 0
+ %9 = extractelement <4 x float> %x0, i64 0
+ %10 = extractelement <4 x float> %x1, i64 0
+ %11 = extractelement <4 x float> %x2, i64 0
+ %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
+ %13 = insertelement <4 x float> %x0, float %12, i64 0
+ %14 = extractelement <4 x float> %x0, i64 0
+ %15 = extractelement <4 x float> %x1, i64 0
+ %16 = extractelement <4 x float> %x2, i64 0
+ %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 10)
+ %18 = bitcast i8 %x3 to <8 x i1>
+ %19 = extractelement <8 x i1> %18, i64 0
+ %20 = select i1 %19, float %17, float %14
+ %21 = insertelement <4 x float> %x0, float %20, i64 0
+ %res3 = fadd <4 x float> %8, %13
+ %res4 = fadd <4 x float> %21, %res3
+ ret <4 x float> %res4
+}
+
+define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_sd(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i64 0, i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast double [[TMP4]] to i64
+; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP10]], 0
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 false, i64 [[TMP11]], i64 [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> [[X0]], double [[TMP12]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[X0]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[X1]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]], i32 11)
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1>
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 0, i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast double [[TMP17]] to i64
+; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP22]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP23]], 0
+; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select i1 false, i64 [[TMP24]], i64 [[TMP20]]
+; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], double [[TMP17]], double 0.000000e+00
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X0]], double [[TMP25]], i64 0
+; CHECK-NEXT: [[RES2:%.*]] = fadd <2 x double> [[TMP13]], [[TMP26]]
+; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES2]]
+;
+ %1 = extractelement <2 x double> %x0, i64 0
+ %2 = extractelement <2 x double> %x1, i64 0
+ %3 = extractelement <2 x double> %x2, i64 0
+ %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, double %4, double 0.000000e+00
+ %8 = insertelement <2 x double> %x0, double %7, i64 0
+ %9 = extractelement <2 x double> %x0, i64 0
+ %10 = extractelement <2 x double> %x1, i64 0
+ %11 = extractelement <2 x double> %x2, i64 0
+ %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
+ %13 = bitcast i8 %x3 to <8 x i1>
+ %14 = extractelement <8 x i1> %13, i64 0
+ %15 = select i1 %14, double %12, double 0.000000e+00
+ %16 = insertelement <2 x double> %x0, double %15, i64 0
+ %res2 = fadd <2 x double> %8, %16
+ ret <2 x double> %res2
+}
+
+declare float @llvm.fma.f32(float, float, float) #1
+declare float @llvm.x86.avx512.vfmadd.f32(float, float, float, i32) #0
+
+define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0
+; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 0, i32 0
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast float [[TMP4]] to i32
+; CHECK-NEXT: [[TMP9:%.*]] = xor i32 [[TMP8]], 0
+; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], 0
+; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], 0
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 false, i32 [[TMP11]], i32 [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[X0]], float [[TMP12]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[X0]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[X1]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]], i32 11)
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1>
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 0, i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast float [[TMP17]] to i32
+; CHECK-NEXT: [[TMP22:%.*]] = xor i32 [[TMP21]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP22]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], 0
+; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select i1 false, i32 [[TMP24]], i32 [[TMP20]]
+; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], float [[TMP17]], float 0.000000e+00
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X0]], float [[TMP25]], i64 0
+; CHECK-NEXT: [[RES2:%.*]] = fadd <4 x float> [[TMP13]], [[TMP26]]
+; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES2]]
+;
+ %1 = extractelement <4 x float> %x0, i64 0
+ %2 = extractelement <4 x float> %x1, i64 0
+ %3 = extractelement <4 x float> %x2, i64 0
+ %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, float %4, float 0.000000e+00
+ %8 = insertelement <4 x float> %x0, float %7, i64 0
+ %9 = extractelement <4 x float> %x0, i64 0
+ %10 = extractelement <4 x float> %x1, i64 0
+ %11 = extractelement <4 x float> %x2, i64 0
+ %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
+ %13 = bitcast i8 %x3 to <8 x i1>
+ %14 = extractelement <8 x i1> %13, i64 0
+ %15 = select i1 %14, float %12, float 0.000000e+00
+ %16 = insertelement <4 x float> %x0, float %15, i64 0
+ %res2 = fadd <4 x float> %8, %16
+ ret <4 x float> %res2
+}
+
+define <4 x float> @test_int_x86_avx512_maskz_vfmadd_ss_load0(i8 zeroext %0, ptr nocapture readonly %1, float %2, float %3) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss_load0(
+; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
+; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[TMP11:%.*]] = load <4 x float>, ptr [[TMP1:%.*]], align 16
+; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP1]] to i64
+; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080
+; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16
+; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i64 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[_MSPROP]], [[TMP6]]
+; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[_MSPROP1]], [[TMP7]]
+; CHECK-NEXT: [[TMP16:%.*]] = tail call float @llvm.fma.f32(float [[TMP15]], float [[TMP2:%.*]], float [[TMP3:%.*]])
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[TMP8]] to <8 x i1>
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP0:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <8 x i1> [[TMP17]], i64 0
+; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[_MSPROP2]], i32 0
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast float [[TMP16]] to i32
+; CHECK-NEXT: [[TMP22:%.*]] = xor i32 [[TMP21]], 0
+; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP22]], [[_MSPROP2]]
+; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], 0
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP3]], i32 [[TMP24]], i32 [[TMP20]]
+; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], float [[TMP16]], float 0.000000e+00
+; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <4 x i32> [[_MSLD]], i32 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP25]], i64 0
+; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[TMP26]]
+;
+ %5 = load <4 x float>, ptr %1, align 16
+ %6 = extractelement <4 x float> %5, i64 0
+ %7 = tail call float @llvm.fma.f32(float %6, float %2, float %3) #2
+ %8 = bitcast i8 %0 to <8 x i1>
+ %9 = extractelement <8 x i1> %8, i64 0
+ %10 = select i1 %9, float %7, float 0.000000e+00
+ %11 = insertelement <4 x float> %5, float %10, i64 0
+ ret <4 x float> %11
+}
+
+define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
+; CHECK-NEXT: [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
+; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 [[_MSPROP2]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast double [[TMP8]] to i64
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast double [[TMP7]] to i64
+; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[_MSPROP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[_MSPROP2]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP17]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], double [[TMP8]], double [[TMP7]]
+; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[X2]], double [[TMP18]], i64 0
+; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0
+; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i64 [[_MSPROP8]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
+; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i64 [[_MSPROP9]], 0
+; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
+; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
+; CHECK: 23:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 24:
+; CHECK-NEXT: [[TMP25:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP20]], double [[TMP21]], double [[TMP22]], i32 11)
+; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i64 0
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X2]], double [[TMP25]], i64 0
+; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP11]], 0
+; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP12]], 0
+; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
+; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
+; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
+; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
+; CHECK: 30:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 31:
+; CHECK-NEXT: [[TMP32:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP27]], double [[TMP28]], double [[TMP29]], i32 10)
+; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
+; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 [[_MSPROP13]]
+; CHECK-NEXT: [[TMP37:%.*]] = bitcast double [[TMP32]] to i64
+; CHECK-NEXT: [[TMP38:%.*]] = bitcast double [[TMP29]] to i64
+; CHECK-NEXT: [[TMP39:%.*]] = xor i64 [[TMP37]], [[TMP38]]
+; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP39]], 0
+; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP40]], [[_MSPROP13]]
+; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i64 [[TMP41]], i64 [[TMP36]]
+; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], double [[TMP32]], double [[TMP29]]
+; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT15]], i64 0
+; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x double> [[X2]], double [[TMP42]], i64 0
+; CHECK-NEXT: [[_MSPROP17:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP10]]
+; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP19]], [[TMP26]]
+; CHECK-NEXT: [[_MSPROP18:%.*]] = or <2 x i64> [[_MSPROP16]], [[_MSPROP17]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP43]], [[RES3]]
+; CHECK-NEXT: store <2 x i64> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES4]]
+;
+ %1 = extractelement <2 x double> %x0, i64 0
+ %2 = extractelement <2 x double> %x1, i64 0
+ %3 = extractelement <2 x double> %x2, i64 0
+ %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, double %4, double %3
+ %8 = insertelement <2 x double> %x2, double %7, i64 0
+ %9 = extractelement <2 x double> %x0, i64 0
+ %10 = extractelement <2 x double> %x1, i64 0
+ %11 = extractelement <2 x double> %x2, i64 0
+ %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
+ %13 = insertelement <2 x double> %x2, double %12, i64 0
+ %14 = extractelement <2 x double> %x0, i64 0
+ %15 = extractelement <2 x double> %x1, i64 0
+ %16 = extractelement <2 x double> %x2, i64 0
+ %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 10)
+ %18 = bitcast i8 %x3 to <8 x i1>
+ %19 = extractelement <8 x i1> %18, i64 0
+ %20 = select i1 %19, double %17, double %16
+ %21 = insertelement <2 x double> %x2, double %20, i64 0
+ %res3 = fadd <2 x double> %8, %13
+ %res4 = fadd <2 x double> %21, %res3
+ ret <2 x double> %res4
+}
+
+define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
+; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
+; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 [[_MSPROP2]]
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast float [[TMP8]] to i32
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP7]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP4]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[_MSPROP2]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP17]], i32 [[TMP12]]
+; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], float [[TMP8]], float [[TMP7]]
+; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X2]], float [[TMP18]], i64 0
+; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0
+; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i32 [[_MSPROP8]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
+; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i32 [[_MSPROP9]], 0
+; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
+; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
+; CHECK: 23:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 24:
+; CHECK-NEXT: [[TMP25:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP20]], float [[TMP21]], float [[TMP22]], i32 11)
+; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <4 x i32> [[TMP3]], i32 0, i64 0
+; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X2]], float [[TMP25]], i64 0
+; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP11]], 0
+; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP12]], 0
+; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
+; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
+; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
+; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
+; CHECK: 30:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 31:
+; CHECK-NEXT: [[TMP32:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP27]], float [[TMP28]], float [[TMP29]], i32 10)
+; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
+; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 0, i32 [[_MSPROP13]]
+; CHECK-NEXT: [[TMP37:%.*]] = bitcast float [[TMP32]] to i32
+; CHECK-NEXT: [[TMP38:%.*]] = bitcast float [[TMP29]] to i32
+; CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[TMP37]], [[TMP38]]
+; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], 0
+; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP40]], [[_MSPROP13]]
+; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP41]], i32 [[TMP36]]
+; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], float [[TMP32]], float [[TMP29]]
+; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT15]], i64 0
+; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[X2]], float [[TMP42]], i64 0
+; CHECK-NEXT: [[_MSPROP17:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP10]]
+; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP19]], [[TMP26]]
+; CHECK-NEXT: [[_MSPROP18:%.*]] = or <4 x i32> [[_MSPROP16]], [[_MSPROP17]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP43]], [[RES3]]
+; CHECK-NEXT: store <4 x i32> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES4]]
+;
+ %1 = extractelement <4 x float> %x0, i64 0
+ %2 = extractelement <4 x float> %x1, i64 0
+ %3 = extractelement <4 x float> %x2, i64 0
+ %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, float %4, float %3
+ %8 = insertelement <4 x float> %x2, float %7, i64 0
+ %9 = extractelement <4 x float> %x0, i64 0
+ %10 = extractelement <4 x float> %x1, i64 0
+ %11 = extractelement <4 x float> %x2, i64 0
+ %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
+ %13 = insertelement <4 x float> %x2, float %12, i64 0
+ %14 = extractelement <4 x float> %x0, i64 0
+ %15 = extractelement <4 x float> %x1, i64 0
+ %16 = extractelement <4 x float> %x2, i64 0
+ %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 10)
+ %18 = bitcast i8 %x3 to <8 x i1>
+ %19 = extractelement <8 x i1> %18, i64 0
+ %20 = select i1 %19, float %17, float %16
+ %21 = insertelement <4 x float> %x2, float %20, i64 0
+ %res3 = fadd <4 x float> %8, %13
+ %res4 = fadd <4 x float> %21, %res3
+ ret <4 x float> %res4
+}
+
+define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) #0 {
+; CHECK-LABEL: @fmadd_ss_mask_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[A_VAL:%.*]] = load float, ptr [[A:%.*]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
+; CHECK-NEXT: [[AV0:%.*]] = insertelement <4 x float> undef, float [[A_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
+; CHECK-NEXT: [[AV1:%.*]] = insertelement <4 x float> [[AV0]], float 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
+; CHECK-NEXT: [[AV2:%.*]] = insertelement <4 x float> [[AV1]], float 0.000000e+00, i32 2
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
+; CHECK-NEXT: [[AV:%.*]] = insertelement <4 x float> [[AV2]], float 0.000000e+00, i32 3
+; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP17]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[B_VAL:%.*]] = load float, ptr [[B:%.*]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-NEXT: [[_MSLD4:%.*]] = load i32, ptr [[TMP13]], align 4
+; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD4]], i32 0
+; CHECK-NEXT: [[BV0:%.*]] = insertelement <4 x float> undef, float [[B_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[_MSPROP5]], i32 0, i32 1
+; CHECK-NEXT: [[BV1:%.*]] = insertelement <4 x float> [[BV0]], float 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[_MSPROP6]], i32 0, i32 2
+; CHECK-NEXT: [[BV2:%.*]] = insertelement <4 x float> [[BV1]], float 0.000000e+00, i32 2
+; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <4 x i32> [[_MSPROP7]], i32 0, i32 3
+; CHECK-NEXT: [[BV:%.*]] = insertelement <4 x float> [[BV2]], float 0.000000e+00, i32 3
+; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[AV]], i64 0
+; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[_MSPROP8]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[BV]], i64 0
+; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[AV]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = or i32 [[_MSPROP9]], [[_MSPROP10]]
+; CHECK-NEXT: [[_MSPROP13:%.*]] = or i32 [[_MSPROP12]], [[_MSPROP11]]
+; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fma.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]])
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[_MSPROP13]], i32 [[_MSPROP9]]
+; CHECK-NEXT: [[TMP22:%.*]] = bitcast float [[TMP17]] to i32
+; CHECK-NEXT: [[TMP23:%.*]] = bitcast float [[TMP14]] to i32
+; CHECK-NEXT: [[TMP24:%.*]] = xor i32 [[TMP22]], [[TMP23]]
+; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP24]], [[_MSPROP13]]
+; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP25]], [[_MSPROP9]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP26]], i32 [[TMP21]]
+; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP20]], float [[TMP17]], float [[TMP14]]
+; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <4 x i32> [[_MSPROP3]], i32 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x float> [[AV]], float [[TMP27]], i64 0
+; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[_MSPROP15]], i32 0
+; CHECK-NEXT: [[SR:%.*]] = extractelement <4 x float> [[TMP28]], i32 0
+; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP18]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]]
+; CHECK: 29:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 30:
+; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080
+; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr
+; CHECK-NEXT: store i32 [[_MSPROP16]], ptr [[TMP33]], align 4
+; CHECK-NEXT: store float [[SR]], ptr [[A]], align 4
+; CHECK-NEXT: ret void
+;
+ %a.val = load float, ptr %a
+ %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+ %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+ %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+ %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+ %b.val = load float, ptr %b
+ %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+ %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+ %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+ %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+ %1 = extractelement <4 x float> %av, i64 0
+ %2 = extractelement <4 x float> %bv, i64 0
+ %3 = extractelement <4 x float> %av, i64 0
+ %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+ %5 = bitcast i8 %c to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, float %4, float %1
+ %8 = insertelement <4 x float> %av, float %7, i64 0
+ %sr = extractelement <4 x float> %8, i32 0
+ store float %sr, ptr %a
+ ret void
+}
+
+define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) #0 {
+; CHECK-LABEL: @fmadd_ss_maskz_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[A_VAL:%.*]] = load float, ptr [[A:%.*]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
+; CHECK-NEXT: [[AV0:%.*]] = insertelement <4 x float> undef, float [[A_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
+; CHECK-NEXT: [[AV1:%.*]] = insertelement <4 x float> [[AV0]], float 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
+; CHECK-NEXT: [[AV2:%.*]] = insertelement <4 x float> [[AV1]], float 0.000000e+00, i32 2
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
+; CHECK-NEXT: [[AV:%.*]] = insertelement <4 x float> [[AV2]], float 0.000000e+00, i32 3
+; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP17]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[B_VAL:%.*]] = load float, ptr [[B:%.*]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-NEXT: [[_MSLD4:%.*]] = load i32, ptr [[TMP13]], align 4
+; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD4]], i32 0
+; CHECK-NEXT: [[BV0:%.*]] = insertelement <4 x float> undef, float [[B_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[_MSPROP5]], i32 0, i32 1
+; CHECK-NEXT: [[BV1:%.*]] = insertelement <4 x float> [[BV0]], float 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[_MSPROP6]], i32 0, i32 2
+; CHECK-NEXT: [[BV2:%.*]] = insertelement <4 x float> [[BV1]], float 0.000000e+00, i32 2
+; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <4 x i32> [[_MSPROP7]], i32 0, i32 3
+; CHECK-NEXT: [[BV:%.*]] = insertelement <4 x float> [[BV2]], float 0.000000e+00, i32 3
+; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[AV]], i64 0
+; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[_MSPROP8]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[BV]], i64 0
+; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[AV]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = or i32 [[_MSPROP9]], [[_MSPROP10]]
+; CHECK-NEXT: [[_MSPROP13:%.*]] = or i32 [[_MSPROP12]], [[_MSPROP11]]
+; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fma.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]])
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[_MSPROP13]], i32 0
+; CHECK-NEXT: [[TMP22:%.*]] = bitcast float [[TMP17]] to i32
+; CHECK-NEXT: [[TMP23:%.*]] = xor i32 [[TMP22]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], [[_MSPROP13]]
+; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP24]], 0
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP25]], i32 [[TMP21]]
+; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP20]], float [[TMP17]], float 0.000000e+00
+; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <4 x i32> [[_MSPROP3]], i32 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[AV]], float [[TMP26]], i64 0
+; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[_MSPROP15]], i32 0
+; CHECK-NEXT: [[SR:%.*]] = extractelement <4 x float> [[TMP27]], i32 0
+; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP18]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
+; CHECK: 28:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 29:
+; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080
+; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr
+; CHECK-NEXT: store i32 [[_MSPROP16]], ptr [[TMP32]], align 4
+; CHECK-NEXT: store float [[SR]], ptr [[A]], align 4
+; CHECK-NEXT: ret void
+;
+ %a.val = load float, ptr %a
+ %av0 = insertelement <4 x float> undef, float %a.val, i32 0
+ %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
+ %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
+ %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
+
+ %b.val = load float, ptr %b
+ %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
+ %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
+ %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
+ %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
+ %1 = extractelement <4 x float> %av, i64 0
+ %2 = extractelement <4 x float> %bv, i64 0
+ %3 = extractelement <4 x float> %av, i64 0
+ %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+ %5 = bitcast i8 %c to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, float %4, float 0.000000e+00
+ %8 = insertelement <4 x float> %av, float %7, i64 0
+ %sr = extractelement <4 x float> %8, i32 0
+ store float %sr, ptr %a
+ ret void
+}
+
+define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) #0 {
+; CHECK-LABEL: @fmadd_sd_mask_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
+; CHECK-NEXT: [[AV0:%.*]] = insertelement <2 x double> undef, double [[A_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
+; CHECK-NEXT: [[AV:%.*]] = insertelement <2 x double> [[AV0]], double 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP13]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[B_VAL:%.*]] = load double, ptr [[B:%.*]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-NEXT: [[_MSLD2:%.*]] = load i64, ptr [[TMP13]], align 8
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD2]], i32 0
+; CHECK-NEXT: [[BV0:%.*]] = insertelement <2 x double> undef, double [[B_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <2 x i64> [[_MSPROP3]], i64 0, i32 1
+; CHECK-NEXT: [[BV:%.*]] = insertelement <2 x double> [[BV0]], double 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[AV]], i64 0
+; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <2 x i64> [[_MSPROP4]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[BV]], i64 0
+; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[AV]], i64 0
+; CHECK-NEXT: [[_MSPROP8:%.*]] = or i64 [[_MSPROP5]], [[_MSPROP6]]
+; CHECK-NEXT: [[_MSPROP9:%.*]] = or i64 [[_MSPROP8]], [[_MSPROP7]]
+; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.fma.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]])
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i64 [[_MSPROP9]], i64 [[_MSPROP5]]
+; CHECK-NEXT: [[TMP22:%.*]] = bitcast double [[TMP17]] to i64
+; CHECK-NEXT: [[TMP23:%.*]] = bitcast double [[TMP14]] to i64
+; CHECK-NEXT: [[TMP24:%.*]] = xor i64 [[TMP22]], [[TMP23]]
+; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP24]], [[_MSPROP9]]
+; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP25]], [[_MSPROP5]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP26]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP20]], double [[TMP17]], double [[TMP14]]
+; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <2 x i64> [[_MSPROP1]], i64 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x double> [[AV]], double [[TMP27]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[_MSPROP11]], i32 0
+; CHECK-NEXT: [[SR:%.*]] = extractelement <2 x double> [[TMP28]], i32 0
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP14]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]]
+; CHECK: 29:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 30:
+; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080
+; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr
+; CHECK-NEXT: store i64 [[_MSPROP12]], ptr [[TMP33]], align 8
+; CHECK-NEXT: store double [[SR]], ptr [[A]], align 8
+; CHECK-NEXT: ret void
+;
+ %a.val = load double, ptr %a
+ %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+ %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+ %b.val = load double, ptr %b
+ %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+ %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+ %1 = extractelement <2 x double> %av, i64 0
+ %2 = extractelement <2 x double> %bv, i64 0
+ %3 = extractelement <2 x double> %av, i64 0
+ %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
+ %5 = bitcast i8 %c to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, double %4, double %1
+ %8 = insertelement <2 x double> %av, double %7, i64 0
+ %sr = extractelement <2 x double> %8, i32 0
+ store double %sr, ptr %a
+ ret void
+}
+
+define void @fmadd_sd_maskz_memfold(ptr %a, ptr %b, i8 %c) #0 {
+; CHECK-LABEL: @fmadd_sd_maskz_memfold(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A:%.*]], align 8
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
+; CHECK-NEXT: [[AV0:%.*]] = insertelement <2 x double> undef, double [[A_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
+; CHECK-NEXT: [[AV:%.*]] = insertelement <2 x double> [[AV0]], double 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i64 [[TMP2]], 0
+; CHECK-NEXT: br i1 [[_MSCMP13]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 10:
+; CHECK-NEXT: [[B_VAL:%.*]] = load double, ptr [[B:%.*]], align 8
+; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
+; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
+; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
+; CHECK-NEXT: [[_MSLD2:%.*]] = load i64, ptr [[TMP13]], align 8
+; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD2]], i32 0
+; CHECK-NEXT: [[BV0:%.*]] = insertelement <2 x double> undef, double [[B_VAL]], i32 0
+; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <2 x i64> [[_MSPROP3]], i64 0, i32 1
+; CHECK-NEXT: [[BV:%.*]] = insertelement <2 x double> [[BV0]], double 0.000000e+00, i32 1
+; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[AV]], i64 0
+; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <2 x i64> [[_MSPROP4]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[BV]], i64 0
+; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[AV]], i64 0
+; CHECK-NEXT: [[_MSPROP8:%.*]] = or i64 [[_MSPROP5]], [[_MSPROP6]]
+; CHECK-NEXT: [[_MSPROP9:%.*]] = or i64 [[_MSPROP8]], [[_MSPROP7]]
+; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.fma.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]])
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i64 [[_MSPROP9]], i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = bitcast double [[TMP17]] to i64
+; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP22]], 0
+; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP23]], [[_MSPROP9]]
+; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP24]], 0
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP25]], i64 [[TMP21]]
+; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP20]], double [[TMP17]], double 0.000000e+00
+; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <2 x i64> [[_MSPROP1]], i64 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x double> [[AV]], double [[TMP26]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[_MSPROP11]], i32 0
+; CHECK-NEXT: [[SR:%.*]] = extractelement <2 x double> [[TMP27]], i32 0
+; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP14]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
+; CHECK: 28:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 29:
+; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[A]] to i64
+; CHECK-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080
+; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr
+; CHECK-NEXT: store i64 [[_MSPROP12]], ptr [[TMP32]], align 8
+; CHECK-NEXT: store double [[SR]], ptr [[A]], align 8
+; CHECK-NEXT: ret void
+;
+ %a.val = load double, ptr %a
+ %av0 = insertelement <2 x double> undef, double %a.val, i32 0
+ %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
+
+ %b.val = load double, ptr %b
+ %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
+ %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
+ %1 = extractelement <2 x double> %av, i64 0
+ %2 = extractelement <2 x double> %bv, i64 0
+ %3 = extractelement <2 x double> %av, i64 0
+ %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
+ %5 = bitcast i8 %c to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, double %4, double 0.000000e+00
+ %8 = insertelement <2 x double> %av, double %7, i64 0
+ %sr = extractelement <2 x double> %8, i32 0
+ store double %sr, ptr %a
+ ret void
+}
+
+define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmsub_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = fneg <2 x double> [[X2:%.*]]
+; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP6]], double [[TMP7]], double [[TMP8]])
+; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP11]], i64 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[_MSPROP4]], i64 [[_MSPROP5]]
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast double [[TMP9]] to i64
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast double [[TMP10]] to i64
+; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[_MSPROP4]]
+; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[_MSPROP5]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i64 [[TMP19]], i64 [[TMP14]]
+; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP13]], double [[TMP9]], double [[TMP10]]
+; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> [[X2]], double [[TMP20]], i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = fneg <2 x double> [[X2]]
+; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x double> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP22]], i64 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP8]], 0
+; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP9]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
+; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP10]], 0
+; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
+; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]]
+; CHECK: 26:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 27:
+; CHECK-NEXT: [[TMP28:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP23]], double [[TMP24]], double [[TMP25]], i32 11)
+; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP1]], i64 0, i64 0
+; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x double> [[X2]], double [[TMP28]], i64 0
+; CHECK-NEXT: [[TMP31:%.*]] = fneg <2 x double> [[X2]]
+; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP31]], i64 0
+; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
+; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i64 [[_MSPROP14]], 0
+; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
+; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i64 [[_MSPROP15]], 0
+; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
+; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP35:%.*]], label [[TMP36:%.*]], !prof [[PROF1]]
+; CHECK: 35:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 36:
+; CHECK-NEXT: [[TMP37:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP32]], double [[TMP33]], double [[TMP34]], i32 10)
+; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP40:%.*]] = bitcast i8 [[X3]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP39]], i64 0
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP40]], i64 0
+; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i64 0, i64 [[_MSPROP16]]
+; CHECK-NEXT: [[TMP43:%.*]] = bitcast double [[TMP37]] to i64
+; CHECK-NEXT: [[TMP44:%.*]] = bitcast double [[TMP38]] to i64
+; CHECK-NEXT: [[TMP45:%.*]] = xor i64 [[TMP43]], [[TMP44]]
+; CHECK-NEXT: [[TMP46:%.*]] = or i64 [[TMP45]], 0
+; CHECK-NEXT: [[TMP47:%.*]] = or i64 [[TMP46]], [[_MSPROP16]]
+; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i64 [[TMP47]], i64 [[TMP42]]
+; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], double [[TMP37]], double [[TMP38]]
+; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT18]], i64 0
+; CHECK-NEXT: [[TMP49:%.*]] = insertelement <2 x double> [[X2]], double [[TMP48]], i64 0
+; CHECK-NEXT: [[_MSPROP20:%.*]] = or <2 x i64> [[_MSPROP7]], [[_MSPROP12]]
+; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP21]], [[TMP30]]
+; CHECK-NEXT: [[_MSPROP21:%.*]] = or <2 x i64> [[_MSPROP19]], [[_MSPROP20]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP49]], [[RES3]]
+; CHECK-NEXT: store <2 x i64> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES4]]
+;
+ %1 = fneg <2 x double> %x2
+ %2 = extractelement <2 x double> %x0, i64 0
+ %3 = extractelement <2 x double> %x1, i64 0
+ %4 = extractelement <2 x double> %1, i64 0
+ %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
+ %6 = extractelement <2 x double> %x2, i64 0
+ %7 = bitcast i8 %x3 to <8 x i1>
+ %8 = extractelement <8 x i1> %7, i64 0
+ %9 = select i1 %8, double %5, double %6
+ %10 = insertelement <2 x double> %x2, double %9, i64 0
+ %11 = fneg <2 x double> %x2
+ %12 = extractelement <2 x double> %x0, i64 0
+ %13 = extractelement <2 x double> %x1, i64 0
+ %14 = extractelement <2 x double> %11, i64 0
+ %15 = call double @llvm.x86.avx512.vfmadd.f64(double %12, double %13, double %14, i32 11)
+ %16 = extractelement <2 x double> %x2, i64 0
+ %17 = insertelement <2 x double> %x2, double %15, i64 0
+ %18 = fneg <2 x double> %x2
+ %19 = extractelement <2 x double> %x0, i64 0
+ %20 = extractelement <2 x double> %x1, i64 0
+ %21 = extractelement <2 x double> %18, i64 0
+ %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 10)
+ %23 = extractelement <2 x double> %x2, i64 0
+ %24 = bitcast i8 %x3 to <8 x i1>
+ %25 = extractelement <8 x i1> %24, i64 0
+ %26 = select i1 %25, double %22, double %23
+ %27 = insertelement <2 x double> %x2, double %26, i64 0
+ %res3 = fadd <2 x double> %10, %17
+ %res4 = fadd <2 x double> %27, %res3
+ ret <2 x double> %res4
+}
+
+define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmsub_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = fneg <4 x float> [[X2:%.*]]
+; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP5]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
+; CHECK-NEXT: [[TMP9:%.*]] = call float @llvm.fma.f32(float [[TMP6]], float [[TMP7]], float [[TMP8]])
+; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP11]], i64 0
+; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[_MSPROP4]], i32 [[_MSPROP5]]
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast float [[TMP9]] to i32
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP10]] to i32
+; CHECK-NEXT: [[TMP17:%.*]] = xor i32 [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP17]], [[_MSPROP4]]
+; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP18]], [[_MSPROP5]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP19]], i32 [[TMP14]]
+; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP13]], float [[TMP9]], float [[TMP10]]
+; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x float> [[X2]], float [[TMP20]], i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = fneg <4 x float> [[X2]]
+; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x float> [[TMP22]], i64 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP8]], 0
+; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP9]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
+; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP10]], 0
+; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
+; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]]
+; CHECK: 26:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 27:
+; CHECK-NEXT: [[TMP28:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP23]], float [[TMP24]], float [[TMP25]], i32 11)
+; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i64 0
+; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[X2]], float [[TMP28]], i64 0
+; CHECK-NEXT: [[TMP31:%.*]] = fneg <4 x float> [[X2]]
+; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x float> [[X0]], i64 0
+; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x float> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x float> [[TMP31]], i64 0
+; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
+; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i32 [[_MSPROP14]], 0
+; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
+; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[_MSPROP15]], 0
+; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
+; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP35:%.*]], label [[TMP36:%.*]], !prof [[PROF1]]
+; CHECK: 35:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 36:
+; CHECK-NEXT: [[TMP37:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP32]], float [[TMP33]], float [[TMP34]], i32 10)
+; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP40:%.*]] = bitcast i8 [[X3]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP39]], i64 0
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP40]], i64 0
+; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 0, i32 [[_MSPROP16]]
+; CHECK-NEXT: [[TMP43:%.*]] = bitcast float [[TMP37]] to i32
+; CHECK-NEXT: [[TMP44:%.*]] = bitcast float [[TMP38]] to i32
+; CHECK-NEXT: [[TMP45:%.*]] = xor i32 [[TMP43]], [[TMP44]]
+; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP45]], 0
+; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP46]], [[_MSPROP16]]
+; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i32 [[TMP47]], i32 [[TMP42]]
+; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], float [[TMP37]], float [[TMP38]]
+; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT18]], i64 0
+; CHECK-NEXT: [[TMP49:%.*]] = insertelement <4 x float> [[X2]], float [[TMP48]], i64 0
+; CHECK-NEXT: [[_MSPROP20:%.*]] = or <4 x i32> [[_MSPROP7]], [[_MSPROP12]]
+; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP21]], [[TMP30]]
+; CHECK-NEXT: [[_MSPROP21:%.*]] = or <4 x i32> [[_MSPROP19]], [[_MSPROP20]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP49]], [[RES3]]
+; CHECK-NEXT: store <4 x i32> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES4]]
+;
+ %1 = fneg <4 x float> %x2
+ %2 = extractelement <4 x float> %x0, i64 0
+ %3 = extractelement <4 x float> %x1, i64 0
+ %4 = extractelement <4 x float> %1, i64 0
+ %5 = call float @llvm.fma.f32(float %2, float %3, float %4)
+ %6 = extractelement <4 x float> %x2, i64 0
+ %7 = bitcast i8 %x3 to <8 x i1>
+ %8 = extractelement <8 x i1> %7, i64 0
+ %9 = select i1 %8, float %5, float %6
+ %10 = insertelement <4 x float> %x2, float %9, i64 0
+ %11 = fneg <4 x float> %x2
+ %12 = extractelement <4 x float> %x0, i64 0
+ %13 = extractelement <4 x float> %x1, i64 0
+ %14 = extractelement <4 x float> %11, i64 0
+ %15 = call float @llvm.x86.avx512.vfmadd.f32(float %12, float %13, float %14, i32 11)
+ %16 = extractelement <4 x float> %x2, i64 0
+ %17 = insertelement <4 x float> %x2, float %15, i64 0
+ %18 = fneg <4 x float> %x2
+ %19 = extractelement <4 x float> %x0, i64 0
+ %20 = extractelement <4 x float> %x1, i64 0
+ %21 = extractelement <4 x float> %18, i64 0
+ %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 10)
+ %23 = extractelement <4 x float> %x2, i64 0
+ %24 = bitcast i8 %x3 to <8 x i1>
+ %25 = extractelement <8 x i1> %24, i64 0
+ %26 = select i1 %25, float %22, float %23
+ %27 = insertelement <4 x float> %x2, float %26, i64 0
+ %res3 = fadd <4 x float> %10, %17
+ %res4 = fadd <4 x float> %27, %res3
+ ret <4 x float> %res4
+}
+
+define <2 x double> @test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask3_vfnmsub_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = fneg <2 x double> [[X0:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = fneg <2 x double> [[X2:%.*]]
+; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP6]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
+; CHECK-NEXT: [[TMP10:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP9]])
+; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP13]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[_MSPROP4]], i64 [[_MSPROP5]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast double [[TMP10]] to i64
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast double [[TMP11]] to i64
+; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[_MSPROP4]]
+; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP19]], [[_MSPROP5]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i64 [[TMP20]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP14]], double [[TMP10]], double [[TMP11]]
+; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> [[X2]], double [[TMP21]], i64 0
+; CHECK-NEXT: [[TMP23:%.*]] = fneg <2 x double> [[X0]]
+; CHECK-NEXT: [[TMP24:%.*]] = fneg <2 x double> [[X2]]
+; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP23]], i64 0
+; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[TMP24]], i64 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP8]], 0
+; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP9]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
+; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP10]], 0
+; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
+; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
+; CHECK: 28:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 29:
+; CHECK-NEXT: [[TMP30:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP25]], double [[TMP26]], double [[TMP27]], i32 11)
+; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP2]], i64 0, i64 0
+; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x double> [[X2]], double [[TMP30]], i64 0
+; CHECK-NEXT: [[TMP33:%.*]] = fneg <2 x double> [[X0]]
+; CHECK-NEXT: [[TMP34:%.*]] = fneg <2 x double> [[X2]]
+; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[TMP33]], i64 0
+; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x double> [[TMP34]], i64 0
+; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
+; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i64 [[_MSPROP14]], 0
+; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
+; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i64 [[_MSPROP15]], 0
+; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
+; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]]
+; CHECK: 38:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 39:
+; CHECK-NEXT: [[TMP40:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP35]], double [[TMP36]], double [[TMP37]], i32 10)
+; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x double> [[X2]], i64 0
+; CHECK-NEXT: [[TMP42:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP43:%.*]] = bitcast i8 [[X3]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP42]], i64 0
+; CHECK-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP43]], i64 0
+; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i64 0, i64 [[_MSPROP16]]
+; CHECK-NEXT: [[TMP46:%.*]] = bitcast double [[TMP40]] to i64
+; CHECK-NEXT: [[TMP47:%.*]] = bitcast double [[TMP41]] to i64
+; CHECK-NEXT: [[TMP48:%.*]] = xor i64 [[TMP46]], [[TMP47]]
+; CHECK-NEXT: [[TMP49:%.*]] = or i64 [[TMP48]], 0
+; CHECK-NEXT: [[TMP50:%.*]] = or i64 [[TMP49]], [[_MSPROP16]]
+; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i64 [[TMP50]], i64 [[TMP45]]
+; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP44]], double [[TMP40]], double [[TMP41]]
+; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT18]], i64 0
+; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x double> [[X2]], double [[TMP51]], i64 0
+; CHECK-NEXT: [[_MSPROP20:%.*]] = or <2 x i64> [[_MSPROP7]], [[_MSPROP12]]
+; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP22]], [[TMP32]]
+; CHECK-NEXT: [[_MSPROP21:%.*]] = or <2 x i64> [[_MSPROP19]], [[_MSPROP20]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP52]], [[RES3]]
+; CHECK-NEXT: store <2 x i64> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <2 x double> [[RES4]]
+;
+ %1 = fneg <2 x double> %x0
+ %2 = fneg <2 x double> %x2
+ %3 = extractelement <2 x double> %1, i64 0
+ %4 = extractelement <2 x double> %x1, i64 0
+ %5 = extractelement <2 x double> %2, i64 0
+ %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
+ %7 = extractelement <2 x double> %x2, i64 0
+ %8 = bitcast i8 %x3 to <8 x i1>
+ %9 = extractelement <8 x i1> %8, i64 0
+ %10 = select i1 %9, double %6, double %7
+ %11 = insertelement <2 x double> %x2, double %10, i64 0
+ %12 = fneg <2 x double> %x0
+ %13 = fneg <2 x double> %x2
+ %14 = extractelement <2 x double> %12, i64 0
+ %15 = extractelement <2 x double> %x1, i64 0
+ %16 = extractelement <2 x double> %13, i64 0
+ %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 11)
+ %18 = extractelement <2 x double> %x2, i64 0
+ %19 = insertelement <2 x double> %x2, double %17, i64 0
+ %20 = fneg <2 x double> %x0
+ %21 = fneg <2 x double> %x2
+ %22 = extractelement <2 x double> %20, i64 0
+ %23 = extractelement <2 x double> %x1, i64 0
+ %24 = extractelement <2 x double> %21, i64 0
+ %25 = call double @llvm.x86.avx512.vfmadd.f64(double %22, double %23, double %24, i32 10)
+ %26 = extractelement <2 x double> %x2, i64 0
+ %27 = bitcast i8 %x3 to <8 x i1>
+ %28 = extractelement <8 x i1> %27, i64 0
+ %29 = select i1 %28, double %25, double %26
+ %30 = insertelement <2 x double> %x2, double %29, i64 0
+ %res3 = fadd <2 x double> %11, %19
+ %res4 = fadd <2 x double> %30, %res3
+ ret <2 x double> %res4
+}
+
+define <4 x float> @test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask3_vfnmsub_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = fneg <4 x float> [[X0:%.*]]
+; CHECK-NEXT: [[TMP6:%.*]] = fneg <4 x float> [[X2:%.*]]
+; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP5]], i64 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
+; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.fma.f32(float [[TMP7]], float [[TMP8]], float [[TMP9]])
+; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP13]], i64 0
+; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[_MSPROP4]], i32 [[_MSPROP5]]
+; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP10]] to i32
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast float [[TMP11]] to i32
+; CHECK-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP18]], [[_MSPROP4]]
+; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP19]], [[_MSPROP5]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP20]], i32 [[TMP15]]
+; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP14]], float [[TMP10]], float [[TMP11]]
+; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[X2]], float [[TMP21]], i64 0
+; CHECK-NEXT: [[TMP23:%.*]] = fneg <4 x float> [[X0]]
+; CHECK-NEXT: [[TMP24:%.*]] = fneg <4 x float> [[X2]]
+; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x float> [[TMP23]], i64 0
+; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[TMP24]], i64 0
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP8]], 0
+; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP9]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
+; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP10]], 0
+; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
+; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
+; CHECK: 28:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 29:
+; CHECK-NEXT: [[TMP30:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP25]], float [[TMP26]], float [[TMP27]], i32 11)
+; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i64 0
+; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x float> [[X2]], float [[TMP30]], i64 0
+; CHECK-NEXT: [[TMP33:%.*]] = fneg <4 x float> [[X0]]
+; CHECK-NEXT: [[TMP34:%.*]] = fneg <4 x float> [[X2]]
+; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x float> [[TMP33]], i64 0
+; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x float> [[X1]], i64 0
+; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x float> [[TMP34]], i64 0
+; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
+; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i32 [[_MSPROP14]], 0
+; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
+; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[_MSPROP15]], 0
+; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
+; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]]
+; CHECK: 38:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 39:
+; CHECK-NEXT: [[TMP40:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP35]], float [[TMP36]], float [[TMP37]], i32 10)
+; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[X2]], i64 0
+; CHECK-NEXT: [[TMP42:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP43:%.*]] = bitcast i8 [[X3]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP42]], i64 0
+; CHECK-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP43]], i64 0
+; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 0, i32 [[_MSPROP16]]
+; CHECK-NEXT: [[TMP46:%.*]] = bitcast float [[TMP40]] to i32
+; CHECK-NEXT: [[TMP47:%.*]] = bitcast float [[TMP41]] to i32
+; CHECK-NEXT: [[TMP48:%.*]] = xor i32 [[TMP46]], [[TMP47]]
+; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP48]], 0
+; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[_MSPROP16]]
+; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i32 [[TMP50]], i32 [[TMP45]]
+; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP44]], float [[TMP40]], float [[TMP41]]
+; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT18]], i64 0
+; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x float> [[X2]], float [[TMP51]], i64 0
+; CHECK-NEXT: [[_MSPROP20:%.*]] = or <4 x i32> [[_MSPROP7]], [[_MSPROP12]]
+; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP22]], [[TMP32]]
+; CHECK-NEXT: [[_MSPROP21:%.*]] = or <4 x i32> [[_MSPROP19]], [[_MSPROP20]]
+; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP52]], [[RES3]]
+; CHECK-NEXT: store <4 x i32> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[RES4]]
+;
+ %1 = fneg <4 x float> %x0
+ %2 = fneg <4 x float> %x2
+ %3 = extractelement <4 x float> %1, i64 0
+ %4 = extractelement <4 x float> %x1, i64 0
+ %5 = extractelement <4 x float> %2, i64 0
+ %6 = call float @llvm.fma.f32(float %3, float %4, float %5)
+ %7 = extractelement <4 x float> %x2, i64 0
+ %8 = bitcast i8 %x3 to <8 x i1>
+ %9 = extractelement <8 x i1> %8, i64 0
+ %10 = select i1 %9, float %6, float %7
+ %11 = insertelement <4 x float> %x2, float %10, i64 0
+ %12 = fneg <4 x float> %x0
+ %13 = fneg <4 x float> %x2
+ %14 = extractelement <4 x float> %12, i64 0
+ %15 = extractelement <4 x float> %x1, i64 0
+ %16 = extractelement <4 x float> %13, i64 0
+ %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 11)
+ %18 = extractelement <4 x float> %x2, i64 0
+ %19 = insertelement <4 x float> %x2, float %17, i64 0
+ %20 = fneg <4 x float> %x0
+ %21 = fneg <4 x float> %x2
+ %22 = extractelement <4 x float> %20, i64 0
+ %23 = extractelement <4 x float> %x1, i64 0
+ %24 = extractelement <4 x float> %21, i64 0
+ %25 = call float @llvm.x86.avx512.vfmadd.f32(float %22, float %23, float %24, i32 10)
+ %26 = extractelement <4 x float> %x2, i64 0
+ %27 = bitcast i8 %x3 to <8 x i1>
+ %28 = extractelement <8 x i1> %27, i64 0
+ %29 = select i1 %28, float %25, float %26
+ %30 = insertelement <4 x float> %x2, float %29, i64 0
+ %res3 = fadd <4 x float> %11, %19
+ %res4 = fadd <4 x float> %30, %res3
+ ret <4 x float> %res4
+}
+
+define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, ptr%ptr_b ,i8 %x3,i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_ss_rm(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
+; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]]
+; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]]
+; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]])
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP14]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0
+; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP5]], i32 [[_MSPROP3]]
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast float [[TMP13]] to i32
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast float [[TMP12]] to i32
+; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP20]], [[_MSPROP5]]
+; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP21]], [[_MSPROP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP22]], i32 [[TMP17]]
+; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP12]]
+; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[X1]], float [[TMP23]], i64 0
+; CHECK-NEXT: store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[TMP24]]
+;
+ %q = load float, ptr %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %1 = extractelement <4 x float> %x0, i64 0
+ %2 = extractelement <4 x float> %vecinit.i, i64 0
+ %3 = extractelement <4 x float> %x1, i64 0
+ %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, float %4, float %3
+ %8 = insertelement <4 x float> %x1, float %7, i64 0
+ ret <4 x float> %8
+}
+
+define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,ptr%ptr_b ,i8 %x3,i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_ss_rm(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64
+; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
+; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
+; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]]
+; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]]
+; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]])
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
+; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP14]], i64 0
+; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0
+; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP5]], i32 [[_MSPROP1]]
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast float [[TMP13]] to i32
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast float [[TMP10]] to i32
+; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP20]], [[_MSPROP5]]
+; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP21]], [[_MSPROP1]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP22]], i32 [[TMP17]]
+; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP10]]
+; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[X0]], float [[TMP23]], i64 0
+; CHECK-NEXT: store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[TMP24]]
+;
+ %q = load float, ptr %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %1 = extractelement <4 x float> %x0, i64 0
+ %2 = extractelement <4 x float> %vecinit.i, i64 0
+ %3 = extractelement <4 x float> %x1, i64 0
+ %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+ %5 = bitcast i8 %x3 to <8 x i1>
+ %6 = extractelement <8 x i1> %5, i64 0
+ %7 = select i1 %6, float %4, float %1
+ %8 = insertelement <4 x float> %x0, float %7, i64 0
+ ret <4 x float> %8
+}
+
+
+define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,ptr%ptr_b ,i8 %x3,i32 %x4) #0 {
+; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss_rm(
+; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 5:
+; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4
+; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
+; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
+; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
+; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
+; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0
+; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
+; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
+; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0
+; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]]
+; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.fma.f32(float [[TMP9]], float [[TMP10]], float [[TMP11]])
+; CHECK-NEXT: [[TMP13:%.*]] = select i1 false, i32 [[_MSPROP5]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP12]] to i32
+; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP14]], 0
+; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP5]]
+; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 0
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 false, i32 [[TMP17]], i32 [[TMP13]]
+; CHECK-NEXT: [[TMP18:%.*]] = select i1 false, float [[TMP12]], float 0.000000e+00
+; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X0]], float [[TMP18]], i64 0
+; CHECK-NEXT: store <4 x i32> [[_MSPROP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <4 x float> [[TMP19]]
+;
+ %q = load float, ptr %ptr_b
+ %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
+ %1 = extractelement <4 x float> %x0, i64 0
+ %2 = extractelement <4 x float> %x1, i64 0
+ %3 = extractelement <4 x float> %vecinit.i, i64 0
+ %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
+ %5 = select i1 false, float %4, float 0.000000e+00
+ %6 = insertelement <4 x float> %x0, float %5, i64 0
+ ret <4 x float> %6
+}
+
+define <16 x i32> @test_x86_avx512_psll_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psll_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
+; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_x86_avx512_mask_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psll_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
+ ret <16 x i32> %res2
+}
+define <16 x i32> @test_x86_avx512_maskz_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psll_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
+ ret <16 x i32> %res2
+}
+declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_psll_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psll_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64>
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
+; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_x86_avx512_mask_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psll_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64>
+; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
+ ret <8 x i64> %res2
+}
+define <8 x i64> @test_x86_avx512_maskz_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psll_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64>
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
+ ret <8 x i64> %res2
+}
+declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) nounwind readnone
+
+
+define <16 x i32> @test_x86_avx512_pslli_d_512(<16 x i32> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_pslli_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7)
+; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_x86_avx512_mask_pslli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_pslli_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
+ ret <16 x i32> %res2
+}
+define <16 x i32> @test_x86_avx512_maskz_pslli_d_512(<16 x i32> %a0, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_pslli_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
+ ret <16 x i32> %res2
+}
+declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_pslli_q_512(<8 x i64> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_pslli_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7)
+; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_x86_avx512_mask_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_pslli_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
+ ret <8 x i64> %res2
+}
+define <8 x i64> @test_x86_avx512_maskz_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_pslli_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
+ ret <8 x i64> %res2
+}
+declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_psra_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psra_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64>
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
+; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_x86_avx512_mask_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psra_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64>
+; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
+ ret <8 x i64> %res2
+}
+define <8 x i64> @test_x86_avx512_maskz_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psra_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64>
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
+ ret <8 x i64> %res2
+}
+declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) nounwind readnone
+
+
+define <16 x i32> @test_x86_avx512_psra_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psra_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
+; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_x86_avx512_mask_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psra_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
+ ret <16 x i32> %res2
+}
+define <16 x i32> @test_x86_avx512_maskz_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psra_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
+ ret <16 x i32> %res2
+}
+declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) nounwind readnone
+
+
+
+define <8 x i64> @test_x86_avx512_psrai_q_512(<8 x i64> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_psrai_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7)
+; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_x86_avx512_mask_psrai_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psrai_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
+ ret <8 x i64> %res2
+}
+define <8 x i64> @test_x86_avx512_maskz_psrai_q_512(<8 x i64> %a0, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psrai_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
+ ret <8 x i64> %res2
+}
+declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) nounwind readnone
+
+
+define <16 x i32> @test_x86_avx512_psrai_d_512(<16 x i32> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_psrai_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7)
+; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_x86_avx512_mask_psrai_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psrai_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
+ ret <16 x i32> %res2
+}
+define <16 x i32> @test_x86_avx512_maskz_psrai_d_512(<16 x i32> %a0, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psrai_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
+ ret <16 x i32> %res2
+}
+declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) nounwind readnone
+
+
+
+define <16 x i32> @test_x86_avx512_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psrl_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32>
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
+; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_x86_avx512_mask_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psrl_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
+ ret <16 x i32> %res2
+}
+define <16 x i32> @test_x86_avx512_maskz_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psrl_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32>
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
+ ret <16 x i32> %res2
+}
+declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psrl_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
+; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64>
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
+; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_x86_avx512_mask_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psrl_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
+; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64>
+; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
+ ret <8 x i64> %res2
+}
+define <8 x i64> @test_x86_avx512_maskz_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psrl_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
+; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
+; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64>
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
+ ret <8 x i64> %res2
+}
+declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) nounwind readnone
+
+
+define <16 x i32> @test_x86_avx512_psrli_d_512(<16 x i32> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_psrli_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7)
+; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ ret <16 x i32> %res
+}
+define <16 x i32> @test_x86_avx512_mask_psrli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psrli_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
+ ret <16 x i32> %res2
+}
+define <16 x i32> @test_x86_avx512_maskz_psrli_d_512(<16 x i32> %a0, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psrli_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
+ ret <16 x i32> %res2
+}
+declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) nounwind readnone
+
+
+define <8 x i64> @test_x86_avx512_psrli_q_512(<8 x i64> %a0) #0 {
+; CHECK-LABEL: @test_x86_avx512_psrli_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7)
+; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ ret <8 x i64> %res
+}
+define <8 x i64> @test_x86_avx512_mask_psrli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psrli_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]]
+; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]]
+; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
+ ret <8 x i64> %res2
+}
+define <8 x i64> @test_x86_avx512_maskz_psrli_q_512(<8 x i64> %a0, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psrli_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7)
+; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7)
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]]
+; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
+ ret <8 x i64> %res2
+}
+declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psllv_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
+; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_psllv_d_512_const() #0 {
+; CHECK-LABEL: @test_x86_avx512_psllv_d_512_const(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
+; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
+; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES0]], [[RES1]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res0 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
+ %res1 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
+ %res2 = add <16 x i32> %res0, %res1
+ ret <16 x i32> %res2
+}
+
+define <16 x i32> @test_x86_avx512_mask_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psllv_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
+ ret <16 x i32> %res2
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psllv_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psllv_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
+; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_psllv_q_512_const() #0 {
+; CHECK-LABEL: @test_x86_avx512_psllv_q_512_const(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
+; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[RES0:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
+; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[RES2:%.*]] = add <8 x i64> [[RES0]], [[RES1]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res0 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1,i64 2, i64 0, i64 34, i64 -2>)
+ %res1 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
+ %res2 = add <8 x i64> %res0, %res1
+ ret <8 x i64> %res2
+}
+
+define <8 x i64> @test_x86_avx512_mask_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psllv_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
+ ret <8 x i64> %res2
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psllv_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64>
+; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psrav_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
+; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_mask_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psrav_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
+ ret <16 x i32> %res2
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psrav_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psrav_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
+; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_mask_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psrav_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
+ ret <8 x i64> %res2
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psrav_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64>
+; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) nounwind readnone
+
+define <16 x i32> @test_x86_avx512_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psrlv_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
+; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
+; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ ret <16 x i32> %res
+}
+
+define <16 x i32> @test_x86_avx512_psrlv_d_512_const() #0 {
+; CHECK-LABEL: @test_x86_avx512_psrlv_d_512_const(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
+; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[RES0:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
+; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES0]], [[RES1]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res0 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
+ %res1 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1 >)
+ %res2 = add <16 x i32> %res0, %res1
+ ret <16 x i32> %res2
+}
+
+define <16 x i32> @test_x86_avx512_mask_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psrlv_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
+ ret <16 x i32> %res2
+}
+
+define <16 x i32> @test_x86_avx512_maskz_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psrlv_d_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
+; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]]
+; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x i32> [[RES2]]
+;
+ %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
+ %mask.cast = bitcast i16 %mask to <16 x i1>
+ %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
+ ret <16 x i32> %res2
+}
+
+declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) nounwind readnone
+
+define <8 x i64> @test_x86_avx512_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 {
+; CHECK-LABEL: @test_x86_avx512_psrlv_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64>
+; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
+; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
+ ret <8 x i64> %res
+}
+
+define <8 x i64> @test_x86_avx512_psrlv_q_512_const() #0 {
+; CHECK-LABEL: @test_x86_avx512_psrlv_q_512_const(
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
+; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i64> [[TMP1]], zeroinitializer
+; CHECK-NEXT: [[RES0:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
+; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
+; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
+; CHECK-NEXT: [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[RES2:%.*]] = add <8 x i64> [[RES0]], [[RES1]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res0 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1,i64 2, i64 0, i64 34, i64 -2>)
+ %res1 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
+ %res2 = add <8 x i64> %res0, %res1
+ ret <8 x i64> %res2
+}
+
+define <8 x i64> @test_x86_avx512_mask_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_mask_psrlv_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64>
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]]
+; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]]
+; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]]
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
+ ret <8 x i64> %res2
+}
+
+define <8 x i64> @test_x86_avx512_maskz_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 {
+; CHECK-LABEL: @test_x86_avx512_maskz_psrlv_q_512(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64>
+; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]]
+; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
+; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
+; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer
+; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]]
+; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]]
+; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
+; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES2]]
+;
+ %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
+ %mask.cast = bitcast i8 %mask to <8 x i1>
+ %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
+ ret <8 x i64> %res2
+}
+
+declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) nounwind readnone
+
+
+define <8 x double> @test_mm256_castpd128_pd256_freeze(<2 x double> %a0) nounwind #0 {
+; CHECK-LABEL: @test_mm256_castpd128_pd256_freeze(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[A1:%.*]] = freeze <2 x double> poison
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES]]
+;
+ %a1 = freeze <2 x double> poison
+ %res = shufflevector <2 x double> %a0, <2 x double> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ ret <8 x double> %res
+}
+
+
+define <8 x double> @test_mm256_castpd256_pd256_freeze(<4 x double> %a0) nounwind #0 {
+; CHECK-LABEL: @test_mm256_castpd256_pd256_freeze(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[A1:%.*]] = freeze <4 x double> poison
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x double> [[RES]]
+;
+ %a1 = freeze <4 x double> poison
+ %res = shufflevector <4 x double> %a0, <4 x double> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x double> %res
+}
+
+
+define <16 x float> @test_mm256_castps128_ps512_freeze(<4 x float> %a0) nounwind #0 {
+; CHECK-LABEL: @test_mm256_castps128_ps512_freeze(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[A1:%.*]] = freeze <4 x float> poison
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES]]
+;
+ %a1 = freeze <4 x float> poison
+ %res = shufflevector <4 x float> %a0, <4 x float> %a1, <16x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
+ ret <16 x float> %res
+}
+
+
+define <16 x float> @test_mm256_castps256_ps512_freeze(<8 x float> %a0) nounwind #0 {
+; CHECK-LABEL: @test_mm256_castps256_ps512_freeze(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[A1:%.*]] = freeze <8 x float> poison
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[RES]]
+;
+ %a1 = freeze <8 x float> poison
+ %res = shufflevector <8 x float> %a0, <8 x float> %a1, <16x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x float> %res
+}
+
+
+define <8 x i64> @test_mm512_castsi128_si512_freeze(<2 x i64> %a0) nounwind #0 {
+; CHECK-LABEL: @test_mm512_castsi128_si512_freeze(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[A1:%.*]] = freeze <2 x i64> poison
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %a1 = freeze <2 x i64> poison
+ %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ ret <8 x i64> %res
+}
+
+
+define <8 x i64> @test_mm512_castsi256_si512_pd256_freeze(<4 x i64> %a0) nounwind #0 {
+; CHECK-LABEL: @test_mm512_castsi256_si512_pd256_freeze(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[A1:%.*]] = freeze <4 x i64> poison
+; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <8 x i64> [[RES]]
+;
+ %a1 = freeze <4 x i64> poison
+ %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ ret <8 x i64> %res
+}
+
+
+define <16 x float> @bad_mask_transition(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) #0 {
+; CHECK-LABEL: @bad_mask_transition(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 320) to ptr), align 8
+; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 256) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP0]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
+; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4)
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i1> [[TMP10]] to i8
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP12]], 0
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
+; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP13]], 0
+; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
+; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 15:
+; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[C:%.*]], <8 x double> [[D:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4)
+; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i1> [[TMP16]] to i8
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP11]] to i16
+; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP17]] to i16
+; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[CONV]] to <16 x i1>
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast i16 [[CONV2]] to <16 x i1>
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i1> [[TMP18]], <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x i1> [[TMP19]], <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP22]], <16 x i32> [[TMP4]], <16 x i32> [[TMP5]]
+; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x float> [[F:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x float> [[E:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP26:%.*]] = xor <16 x i32> [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP27:%.*]] = or <16 x i32> [[TMP26]], [[TMP4]]
+; CHECK-NEXT: [[TMP28:%.*]] = or <16 x i32> [[TMP27]], [[TMP5]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> zeroinitializer, <16 x i32> [[TMP28]], <16 x i32> [[TMP23]]
+; CHECK-NEXT: [[TMP29:%.*]] = select <16 x i1> [[TMP22]], <16 x float> [[F]], <16 x float> [[E]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP29]]
+;
+entry:
+ %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
+ %1 = bitcast <8 x i1> %0 to i8
+ %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
+ %3 = bitcast <8 x i1> %2 to i8
+ %conv = zext i8 %1 to i16
+ %conv2 = zext i8 %3 to i16
+ %4 = bitcast i16 %conv to <16 x i1>
+ %5 = bitcast i16 %conv2 to <16 x i1>
+ %6 = shufflevector <16 x i1> %4, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %7 = shufflevector <16 x i1> %5, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %8 = shufflevector <8 x i1> %6, <8 x i1> %7, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %9 = select <16 x i1> %8, <16 x float> %f, <16 x float> %e
+ ret <16 x float> %9
+}
+
+define <16 x float> @bad_mask_transition_2(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) #0 {
+; CHECK-LABEL: @bad_mask_transition_2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 320) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 256) to ptr), align 8
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP0]] to i512
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
+; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
+; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4)
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i1> [[TMP8]] to i8
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP9]] to i16
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[CONV]] to <16 x i1>
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[F:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[E:%.*]] to <16 x i32>
+; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP12]], [[TMP13]]
+; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP2]]
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP3]]
+; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> zeroinitializer, <16 x i32> [[TMP16]], <16 x i32> [[TMP11]]
+; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[F]], <16 x float> [[E]]
+; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: ret <16 x float> [[TMP17]]
+;
+entry:
+ %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
+ %1 = bitcast <8 x i1> %0 to i8
+ %conv = zext i8 %1 to i16
+ %2 = bitcast i16 %conv to <16 x i1>
+ %3 = select <16 x i1> %2, <16 x float> %f, <16 x float> %e
+ ret <16 x float> %3
+}
+
+declare <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double>, <8 x double>, <8 x i1>)
+declare <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float>, <16 x float>, <16 x i1>)
+declare <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64>, <8 x i64>, <8 x i1>)
+declare <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
+declare <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double>, <8 x double>, <8 x i1>)
+declare <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float>, <16 x float>, <16 x i1>)
+declare <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64>, <8 x i64>, <8 x i1>)
+declare <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
+
+attributes #0 = { sanitize_memory }
>From 76497dbe0f7f6bdb969f90a324717de3692ea597 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Sat, 25 Jan 2025 17:50:26 +0000
Subject: [PATCH 3/5] clang-format
---
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 6d7803db10ed01..bdd944aaa6360a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2997,7 +2997,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// fine).
///
/// Caller guarantees that this intrinsic does not access memory.
- bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I, unsigned int trailingFlags) {
+ bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I,
+ unsigned int trailingFlags) {
Type *RetTy = I.getType();
if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy()))
return false;
>From 38423876b0490d208d790c2d36ee743b53325c0d Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Mon, 27 Jan 2025 00:33:31 +0000
Subject: [PATCH 4/5] Rebase test
---
.../MemorySanitizer/avx512-intrinsics.ll | 13602 ----------------
1 file changed, 13602 deletions(-)
delete mode 100644 llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll
diff --git a/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll
deleted file mode 100644
index c24f9cf377cf62..00000000000000
--- a/llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll
+++ /dev/null
@@ -1,13602 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt %s -S -mtriple=x86_64-linux-gnu -mattr=+avx512f -passes=msan 2>&1 | FileCheck %s
-;
-; Forked from llvm/test/Instrumentation/MemorySanitizer/avx512-intrinsics.ll
-
-define <8 x double> @test_mask_compress_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_compress_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10:[0-9]+]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP11]]
-;
- %1 = bitcast i8 %mask to <8 x i1>
- %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1)
- ret <8 x double> %2
-}
-
-define <8 x double> @test_maskz_compress_pd_512(<8 x double> %data, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_compress_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> zeroinitializer, <8 x i1> [[TMP4]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP9]]
-;
- %1 = bitcast i8 %mask to <8 x i1>
- %2 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1)
- ret <8 x double> %2
-}
-
-define <8 x double> @test_compress_pd_512(<8 x double> %data) #0 {
-; CHECK-LABEL: @test_compress_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> [[DATA:%.*]], <8 x double> undef, <8 x i1> splat (i1 true))
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP2]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double> %data, <8 x double> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
- ret <8 x double> %1
-}
-
-define <16 x float> @test_mask_compress_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_mask_compress_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP11]]
-;
- %1 = bitcast i16 %mask to <16 x i1>
- %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1)
- ret <16 x float> %2
-}
-
-define <16 x float> @test_maskz_compress_ps_512(<16 x float> %data, i16 %mask) #0 {
-; CHECK-LABEL: @test_maskz_compress_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> zeroinitializer, <16 x i1> [[TMP4]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP9]]
-;
- %1 = bitcast i16 %mask to <16 x i1>
- %2 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1)
- ret <16 x float> %2
-}
-
-define <16 x float> @test_compress_ps_512(<16 x float> %data) #0 {
-; CHECK-LABEL: @test_compress_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> [[DATA:%.*]], <16 x float> undef, <16 x i1> splat (i1 true))
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP2]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float> %data, <16 x float> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
- ret <16 x float> %1
-}
-
-define <8 x i64> @test_mask_compress_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_compress_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP11]]
-;
- %1 = bitcast i8 %mask to <8 x i1>
- %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1)
- ret <8 x i64> %2
-}
-
-define <8 x i64> @test_maskz_compress_q_512(<8 x i64> %data, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_compress_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> zeroinitializer, <8 x i1> [[TMP4]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP9]]
-;
- %1 = bitcast i8 %mask to <8 x i1>
- %2 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1)
- ret <8 x i64> %2
-}
-
-define <8 x i64> @test_compress_q_512(<8 x i64> %data) #0 {
-; CHECK-LABEL: @test_compress_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> undef, <8 x i1> splat (i1 true))
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP2]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64> %data, <8 x i64> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
- ret <8 x i64> %1
-}
-
-define <16 x i32> @test_mask_compress_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_mask_compress_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP11]]
-;
- %1 = bitcast i16 %mask to <16 x i1>
- %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1)
- ret <16 x i32> %2
-}
-
-define <16 x i32> @test_maskz_compress_d_512(<16 x i32> %data, i16 %mask) #0 {
-; CHECK-LABEL: @test_maskz_compress_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> zeroinitializer, <16 x i1> [[TMP4]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP9]]
-;
- %1 = bitcast i16 %mask to <16 x i1>
- %2 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1)
- ret <16 x i32> %2
-}
-
-define <16 x i32> @test_compress_d_512(<16 x i32> %data) #0 {
-; CHECK-LABEL: @test_compress_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> undef, <16 x i1> splat (i1 true))
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP2]]
-;
- %1 = call <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32> %data, <16 x i32> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
- ret <16 x i32> %1
-}
-
-define <8 x double> @test_expand_pd_512(<8 x double> %data) #0 {
-; CHECK-LABEL: @test_expand_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> undef, <8 x i1> splat (i1 true))
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP2]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
- ret <8 x double> %1
-}
-
-define <8 x double> @test_mask_expand_pd_512(<8 x double> %data, <8 x double> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_expand_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP11]]
-;
- %1 = bitcast i8 %mask to <8 x i1>
- %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> %passthru, <8 x i1> %1)
- ret <8 x double> %2
-}
-
-define <8 x double> @test_maskz_expand_pd_512(<8 x double> %data, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_expand_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> [[DATA:%.*]], <8 x double> zeroinitializer, <8 x i1> [[TMP4]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP9]]
-;
- %1 = bitcast i8 %mask to <8 x i1>
- %2 = call <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double> %data, <8 x double> zeroinitializer, <8 x i1> %1)
- ret <8 x double> %2
-}
-
-define <16 x float> @test_expand_ps_512(<16 x float> %data) #0 {
-; CHECK-LABEL: @test_expand_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> undef, <16 x i1> splat (i1 true))
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP2]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mask_expand_ps_512(<16 x float> %data, <16 x float> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_mask_expand_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP11]]
-;
- %1 = bitcast i16 %mask to <16 x i1>
- %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> %passthru, <16 x i1> %1)
- ret <16 x float> %2
-}
-
-define <16 x float> @test_maskz_expand_ps_512(<16 x float> %data, i16 %mask) #0 {
-; CHECK-LABEL: @test_maskz_expand_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> [[DATA:%.*]], <16 x float> zeroinitializer, <16 x i1> [[TMP4]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP9]]
-;
- %1 = bitcast i16 %mask to <16 x i1>
- %2 = call <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float> %data, <16 x float> zeroinitializer, <16 x i1> %1)
- ret <16 x float> %2
-}
-
-define <8 x i64> @test_expand_q_512(<8 x i64> %data) #0 {
-; CHECK-LABEL: @test_expand_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> undef, <8 x i1> splat (i1 true))
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP2]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> undef, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
- ret <8 x i64> %1
-}
-
-define <8 x i64> @test_mask_expand_q_512(<8 x i64> %data, <8 x i64> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_expand_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i1> [[TMP4]] to i8
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> [[PASSTHRU:%.*]], <8 x i1> [[TMP5]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP11]]
-;
- %1 = bitcast i8 %mask to <8 x i1>
- %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> %passthru, <8 x i1> %1)
- ret <8 x i64> %2
-}
-
-define <8 x i64> @test_maskz_expand_q_512(<8 x i64> %data, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_expand_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i8 [[TMP1]] to <8 x i1>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i1> [[TMP3]] to i8
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> [[DATA:%.*]], <8 x i64> zeroinitializer, <8 x i1> [[TMP4]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP9]]
-;
- %1 = bitcast i8 %mask to <8 x i1>
- %2 = call <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64> %data, <8 x i64> zeroinitializer, <8 x i1> %1)
- ret <8 x i64> %2
-}
-
-define <16 x i32> @test_expand_d_512(<16 x i32> %data) #0 {
-; CHECK-LABEL: @test_expand_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> undef, <16 x i1> splat (i1 true))
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP2]]
-;
- %1 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> undef, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
- ret <16 x i32> %1
-}
-
-define <16 x i32> @test_mask_expand_d_512(<16 x i32> %data, <16 x i32> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_mask_expand_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i1> [[TMP4]] to i16
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP8]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> [[PASSTHRU:%.*]], <16 x i1> [[TMP5]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP11]]
-;
- %1 = bitcast i16 %mask to <16 x i1>
- %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> %passthru, <16 x i1> %1)
- ret <16 x i32> %2
-}
-
-define <16 x i32> @test_maskz_expand_d_512(<16 x i32> %data, i16 %mask) #0 {
-; CHECK-LABEL: @test_maskz_expand_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[TMP1]] to <16 x i1>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i1> [[TMP3]] to i16
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i16 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> [[DATA:%.*]], <16 x i32> zeroinitializer, <16 x i1> [[TMP4]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP9]]
-;
- %1 = bitcast i16 %mask to <16 x i1>
- %2 = call <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32> %data, <16 x i32> zeroinitializer, <16 x i1> %1)
- ret <16 x i32> %2
-}
-
-define <16 x float> @test_rcp_ps_512(<16 x float> %a0) #0 {
-; CHECK-LABEL: @test_rcp_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES]]
-;
- %res = call <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.rcp14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
-
-define <8 x double> @test_rcp_pd_512(<8 x double> %a0) #0 {
-; CHECK-LABEL: @test_rcp_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1)
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES]]
-;
- %res = call <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1) ; <<8 x double>> [#uses=1]
- ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.avx512.rcp14.pd.512(<8 x double>, <8 x double>, i8) nounwind readnone
-
-declare <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32, i32)
-
-define <2 x double> @test_rndscale_sd(<2 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @test_rndscale_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 11, i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> undef, i8 -1, i32 11, i32 4)
- ret <2 x double>%res
-}
-
-define <2 x double> @test_rndscale_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) #0 {
-; CHECK-LABEL: @test_rndscale_sd_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4)
- ret <2 x double>%res
-}
-
-define <2 x double> @test_rndscale_sd_mask_load(<2 x double> %a, ptr %bptr, <2 x double> %c, i8 %mask) #0 {
-; CHECK-LABEL: @test_rndscale_sd_mask_load(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[B:%.*]] = load <2 x double>, ptr [[BPTR:%.*]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[BPTR]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
-; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <2 x i64>, ptr [[TMP9]], align 16
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %b = load <2 x double>, ptr %bptr
- %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask, i32 11, i32 4)
- ret <2 x double>%res
-}
-
-define <2 x double> @test_rndscale_sd_maskz(<2 x double> %a, <2 x double> %b, i8 %mask) #0 {
-; CHECK-LABEL: @test_rndscale_sd_maskz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 11, i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.rndscale.sd(<2 x double> %a, <2 x double> %b, <2 x double> zeroinitializer, i8 %mask, i32 11, i32 4)
- ret <2 x double>%res
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32, i32)
-
-define <4 x float> @test_rndscale_ss(<4 x float> %a, <4 x float> %b) #0 {
-; CHECK-LABEL: @test_rndscale_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 11, i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4)
- ret <4 x float>%res
-}
-
-define <4 x float> @test_rndscale_ss_load(<4 x float> %a, ptr %bptr) #0 {
-; CHECK-LABEL: @test_rndscale_ss_load(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[B:%.*]] = load <4 x float>, ptr [[BPTR:%.*]], align 16
-; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[BPTR]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
-; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B]], <4 x float> undef, i8 -1, i32 11, i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %b = load <4 x float>, ptr %bptr
- %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> undef, i8 -1, i32 11, i32 4)
- ret <4 x float>%res
-}
-
-define <4 x float> @test_rndscale_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) #0 {
-; CHECK-LABEL: @test_rndscale_ss_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 11, i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask, i32 11, i32 4)
- ret <4 x float>%res
-}
-
-define <4 x float> @test_rndscale_ss_maskz(<4 x float> %a, <4 x float> %b, i8 %mask) #0 {
-; CHECK-LABEL: @test_rndscale_ss_maskz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 11, i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.rndscale.ss(<4 x float> %a, <4 x float> %b, <4 x float> zeroinitializer, i8 %mask, i32 11, i32 4)
- ret <4 x float>%res
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
-
-define <8 x double> @test7(<8 x double> %a) #0 {
-; CHECK-LABEL: @test7(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> [[A:%.*]], i32 11, <8 x double> [[A]], i8 -1, i32 4)
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES]]
-;
- %res = call <8 x double> @llvm.x86.avx512.mask.rndscale.pd.512(<8 x double> %a, i32 11, <8 x double> %a, i8 -1, i32 4)
- ret <8 x double>%res
-}
-
-declare <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
-
-define <16 x float> @test8(<16 x float> %a) #0 {
-; CHECK-LABEL: @test8(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> [[A:%.*]], i32 11, <16 x float> [[A]], i16 -1, i32 4)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES]]
-;
- %res = call <16 x float> @llvm.x86.avx512.mask.rndscale.ps.512(<16 x float> %a, i32 11, <16 x float> %a, i16 -1, i32 4)
- ret <16 x float>%res
-}
-
-define <16 x float> @test_rsqrt_ps_512(<16 x float> %a0) #0 {
-; CHECK-LABEL: @test_rsqrt_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES]]
-;
- %res = call <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1) ; <<16 x float>> [#uses=1]
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.rsqrt14.ps.512(<16 x float>, <16 x float>, i16) nounwind readnone
-
-define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) #0 {
-; CHECK-LABEL: @test_sqrt_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]])
-; CHECK-NEXT: store <8 x i64> [[TMP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP2]]
-;
- %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
- ret <8 x double> %1
-}
-
-define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_sqrt_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]])
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP1]], <8 x i64> [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP4]] to <8 x i64>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x double> [[PASSTHRU:%.*]] to <8 x i64>
-; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP1]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP12]], <8 x i64> [[TMP7]]
-; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP6]], <8 x double> [[TMP4]], <8 x double> [[PASSTHRU]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP13]]
-;
- %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
- %2 = bitcast i8 %mask to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
- ret <8 x double> %3
-}
-
-define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_sqrt_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[A0:%.*]])
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP1]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP3]] to <8 x i64>
-; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP1]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i64> [[TMP10]], <8 x i64> [[TMP6]]
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP5]], <8 x double> [[TMP3]], <8 x double> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP11]]
-;
- %1 = call <8 x double> @llvm.sqrt.v8f64(<8 x double> %a0)
- %2 = bitcast i8 %mask to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
- ret <8 x double> %3
-}
-declare <8 x double> @llvm.sqrt.v8f64(<8 x double>)
-
-define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) #0 {
-; CHECK-LABEL: @test_sqrt_round_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11)
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP5]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
- ret <8 x double> %1
-}
-
-define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_sqrt_round_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> zeroinitializer, <8 x i64> [[TMP3]]
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x double> [[TMP7]] to <8 x i64>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[PASSTHRU:%.*]] to <8 x i64>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP15]], <8 x i64> [[TMP10]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP9]], <8 x double> [[TMP7]], <8 x double> [[PASSTHRU]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP16]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
- %2 = bitcast i8 %mask to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
- ret <8 x double> %3
-}
-
-define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_sqrt_round_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[TMP6:%.*]] = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> [[A0:%.*]], i32 11)
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[TMP6]] to <8 x i64>
-; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP13]], <8 x i64> [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP8]], <8 x double> [[TMP6]], <8 x double> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP14]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double> %a0, i32 11)
- %2 = bitcast i8 %mask to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
- ret <8 x double> %3
-}
-declare <8 x double> @llvm.x86.avx512.sqrt.pd.512(<8 x double>, i32) nounwind readnone
-
-define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) #0 {
-; CHECK-LABEL: @test_sqrt_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]])
-; CHECK-NEXT: store <16 x i32> [[TMP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP2]]
-;
- %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_mask_sqrt_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]])
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP4]] to <16 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP1]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP12]], <16 x i32> [[TMP7]]
-; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP6]], <16 x float> [[TMP4]], <16 x float> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP13]]
-;
- %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
- ret <16 x float> %3
-}
-
-define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) #0 {
-; CHECK-LABEL: @test_maskz_sqrt_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[A0:%.*]])
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP3]] to <16 x i32>
-; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP7]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP1]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP10]], <16 x i32> [[TMP6]]
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[TMP3]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP11]]
-;
- %1 = call <16 x float> @llvm.sqrt.v16f32(<16 x float> %a0)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-declare <16 x float> @llvm.sqrt.v16f32(<16 x float>)
-
-define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) #0 {
-; CHECK-LABEL: @test_sqrt_round_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP5]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_mask_sqrt_round_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> zeroinitializer, <16 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x float> [[TMP7]] to <16 x i32>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP15]], <16 x i32> [[TMP10]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP9]], <16 x float> [[TMP7]], <16 x float> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
- ret <16 x float> %3
-}
-
-define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) #0 {
-; CHECK-LABEL: @test_maskz_sqrt_round_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> [[A0:%.*]], i32 11)
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[TMP6]] to <16 x i32>
-; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP13]], <16 x i32> [[TMP9]]
-; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP8]], <16 x float> [[TMP6]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP14]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float> %a0, i32 11)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-declare <16 x float> @llvm.x86.avx512.sqrt.ps.512(<16 x float>, i32) nounwind readnone
-
-define <8 x double> @test_getexp_pd_512(<8 x double> %a0) #0 {
-; CHECK-LABEL: @test_getexp_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES]]
-;
- %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4)
- ret <8 x double> %res
-}
-define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) #0 {
-; CHECK-LABEL: @test_getexp_round_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> [[A0:%.*]], <8 x double> zeroinitializer, i8 -1, i32 12)
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES]]
-;
- %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 12)
- ret <8 x double> %res
-}
-declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
-
-define <16 x float> @test_getexp_ps_512(<16 x float> %a0) #0 {
-; CHECK-LABEL: @test_getexp_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1, i32 4)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES]]
-;
- %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4)
- ret <16 x float> %res
-}
-
-define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) #0 {
-; CHECK-LABEL: @test_getexp_round_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> [[A0:%.*]], <16 x float> zeroinitializer, i16 -1, i32 8)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES]]
-;
- %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8)
- ret <16 x float> %res
-}
-declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
-
-declare <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
-
-define <4 x float> @test_sqrt_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_sqrt_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES0:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
-; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]], i8 [[MASK]], i32 9)
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSCMP13]], [[_MSCMP14]]
-; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
-; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> zeroinitializer, i8 [[MASK]], i32 10)
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0
-; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0
-; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]]
-; CHECK-NEXT: br i1 [[_MSOR20]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]]
-; CHECK: 21:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 22:
-; CHECK-NEXT: [[RES3:%.*]] = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> zeroinitializer, i8 -1, i32 11)
-; CHECK-NEXT: [[RES_1:%.*]] = fadd <4 x float> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES_2:%.*]] = fadd <4 x float> [[RES2]], [[RES3]]
-; CHECK-NEXT: [[RES:%.*]] = fadd <4 x float> [[RES_1]], [[RES_2]]
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res0 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
- %res1 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9)
- %res2 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 10)
- %res3 = call <4 x float> @llvm.x86.avx512.mask.sqrt.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 11)
-
- %res.1 = fadd <4 x float> %res0, %res1
- %res.2 = fadd <4 x float> %res2, %res3
- %res = fadd <4 x float> %res.1, %res.2
- ret <4 x float> %res
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
-
-define <2 x double> @test_sqrt_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_sqrt_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES0:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
-; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]], i8 [[MASK]], i32 9)
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSCMP13]], [[_MSCMP14]]
-; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
-; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> zeroinitializer, i8 [[MASK]], i32 10)
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0
-; CHECK-NEXT: [[TMP20:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0
-; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]]
-; CHECK-NEXT: br i1 [[_MSOR20]], label [[TMP21:%.*]], label [[TMP22:%.*]], !prof [[PROF1]]
-; CHECK: 21:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 22:
-; CHECK-NEXT: [[RES3:%.*]] = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> zeroinitializer, i8 -1, i32 11)
-; CHECK-NEXT: [[RES_1:%.*]] = fadd <2 x double> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES_2:%.*]] = fadd <2 x double> [[RES2]], [[RES3]]
-; CHECK-NEXT: [[RES:%.*]] = fadd <2 x double> [[RES_1]], [[RES_2]]
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res0 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
- %res1 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9)
- %res2 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 10)
- %res3 = call <2 x double> @llvm.x86.avx512.mask.sqrt.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 11)
-
- %res.1 = fadd <2 x double> %res0, %res1
- %res.2 = fadd <2 x double> %res2, %res3
- %res = fadd <2 x double> %res.1, %res.2
- ret <2 x double> %res
-}
-
-define i32 @test_x86_avx512_cvttsd2usi(<2 x double> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_cvttsd2usi(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[A0:%.*]], i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[A0]], i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES2]]
-;
- %res0 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 4) ;
- %res1 = call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %a0, i32 8) ;
- %res2 = add i32 %res0, %res1
- ret i32 %res2
-}
-declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32) nounwind readnone
-
-define i32 @test_x86_avx512_cvttsd2si(<2 x double> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_cvttsd2si(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[A0:%.*]], i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[A0]], i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES2]]
-;
- %res0 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 4) ;
- %res1 = call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %a0, i32 8) ;
- %res2 = add i32 %res0, %res1
- ret i32 %res2
-}
-declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32) nounwind readnone
-
-define i32 @test_x86_avx512_cvttss2si(<4 x float> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_cvttss2si(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A0:%.*]], i32 8)
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A0]], i32 4)
-; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES2]]
-;
- %res0 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 8) ;
- %res1 = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a0, i32 4) ;
- %res2 = add i32 %res0, %res1
- ret i32 %res2
-}
-declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32) nounwind readnone
-
-define i32 @test_x86_avx512_cvttss2si_load(ptr %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_cvttss2si_load(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP2:%.*]], label [[TMP3:%.*]], !prof [[PROF1]]
-; CHECK: 2:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 3:
-; CHECK-NEXT: [[A1:%.*]] = load <4 x float>, ptr [[A0:%.*]], align 16
-; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr [[A0]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = xor i64 [[TMP4]], 87960930222080
-; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP6]], align 16
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[A1]], i32 4)
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES]]
-;
- %a1 = load <4 x float>, ptr %a0
- %res = call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %a1, i32 4) ;
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_cvttss2usi(<4 x float> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_cvttss2usi(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES0:%.*]] = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[A0:%.*]], i32 8)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[A0]], i32 4)
-; CHECK-NEXT: [[RES2:%.*]] = add i32 [[RES0]], [[RES1]]
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES2]]
-;
- %res0 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 8) ;
- %res1 = call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %a0, i32 4) ;
- %res2 = add i32 %res0, %res1
- ret i32 %res2
-}
-declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32) nounwind readnone
-
-define i32 @test_x86_avx512_cvtsd2usi32(<2 x double> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_cvtsd2usi32(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0:%.*]], i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0]], i32 11)
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP1]], i32 0
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[A0]], i32 9)
-; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES4]]
-;
- %res = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 4)
- %res1 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 11)
- %res2 = call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %a0, i32 9)
- %res3 = add i32 %res, %res1
- %res4 = add i32 %res3, %res2
- ret i32 %res4
-}
-declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32) nounwind readnone
-
-define i32 @test_x86_avx512_cvtsd2si32(<2 x double> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_cvtsd2si32(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0:%.*]], i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0]], i32 11)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[A0]], i32 9)
-; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES4]]
-;
- %res = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 4)
- %res1 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 11)
- %res2 = call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %a0, i32 9)
- %res3 = add i32 %res, %res1
- %res4 = add i32 %res3, %res2
- ret i32 %res4
-}
-declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32) nounwind readnone
-
-define i32 @test_x86_avx512_cvtss2usi32(<4 x float> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_cvtss2usi32(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0:%.*]], i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0]], i32 11)
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i32 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[A0]], i32 9)
-; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES4]]
-;
- %res = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 4)
- %res1 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 11)
- %res2 = call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %a0, i32 9)
- %res3 = add i32 %res, %res1
- %res4 = add i32 %res3, %res2
- ret i32 %res4
-}
-declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32) nounwind readnone
-
-define i32 @test_x86_avx512_cvtss2si32(<4 x float> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_cvtss2si32(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0:%.*]], i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0]], i32 11)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[RES2:%.*]] = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[A0]], i32 9)
-; CHECK-NEXT: [[RES3:%.*]] = add i32 [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add i32 [[RES3]], [[RES2]]
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES4]]
-;
- %res = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 4)
- %res1 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 11)
- %res2 = call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %a0, i32 9)
- %res3 = add i32 %res, %res1
- %res4 = add i32 %res3, %res2
- ret i32 %res4
-}
-declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32) nounwind readnone
-
-define <16 x i16> @test_x86_vcvtps2ph_256(<16 x float> %a0, <16 x i16> %src, i16 %mask, ptr %dst) #0 {
-; CHECK-LABEL: @test_x86_vcvtps2ph_256(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 104) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0:%.*]], i32 2, <16 x i16> zeroinitializer, i16 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 11, <16 x i16> zeroinitializer, i16 [[MASK:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i16> [[TMP3]] to i256
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSCMP3]], [[_MSCMP4]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i16 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES3:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> [[A0]], i32 12, <16 x i16> [[SRC:%.*]], i16 [[MASK]])
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP8]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
-; CHECK: 15:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 16:
-; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr [[DST:%.*]] to i64
-; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP17]], 87960930222080
-; CHECK-NEXT: [[TMP19:%.*]] = inttoptr i64 [[TMP18]] to ptr
-; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr [[TMP19]], align 32
-; CHECK-NEXT: store <16 x i16> [[RES1]], ptr [[DST]], align 32
-; CHECK-NEXT: [[RES:%.*]] = add <16 x i16> [[RES2]], [[RES3]]
-; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i16> [[RES]]
-;
- %res1 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 2, <16 x i16> zeroinitializer, i16 -1)
- %res2 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 11, <16 x i16> zeroinitializer, i16 %mask)
- %res3 = call <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float> %a0, i32 12, <16 x i16> %src, i16 %mask)
- store <16 x i16> %res1, ptr %dst
- %res = add <16 x i16> %res2, %res3
- ret <16 x i16> %res
-}
-
-declare <16 x i16> @llvm.x86.avx512.mask.vcvtps2ph.512(<16 x float>, i32, <16 x i16>, i16) nounwind readonly
-
-define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) #0 {
-; CHECK-LABEL: @test_cmpps(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 2, <16 x i1> splat (i1 true), i32 8)
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i1> [[RES]] to i16
-; CHECK-NEXT: store i16 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i16 [[TMP7]]
-;
- %res = call <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 8)
- %1 = bitcast <16 x i1> %res to i16
- ret i16 %1
-}
-declare <16 x i1> @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, <16 x i1>, i32)
-
-define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) #0 {
-; CHECK-LABEL: @test_cmppd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 4, <8 x i1> splat (i1 true), i32 4)
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i1> [[RES]] to i8
-; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i8 [[TMP7]]
-;
- %res = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
- %1 = bitcast <8 x i1> %res to i8
- ret i8 %1
-}
-declare <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, <8 x i1>, i32)
-
-
- ; fp min - max
-define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) #0 {
-; CHECK-LABEL: @test_vmaxpd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP7]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
- ret <8 x double> %1
-}
-declare <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double>, <8 x double>, i32)
-
-define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) #0 {
-; CHECK-LABEL: @test_vminpd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP7]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
- ret <8 x double> %1
-}
-declare <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double>, <8 x double>, i32)
-
-define void @test_mask_store_ss(ptr %ptr, <4 x float> %data, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_store_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP1]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = and i8 [[TMP1]], 1
-; CHECK-NEXT: [[TMP7:%.*]] = or i8 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP8:%.*]] = or i8 [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[TMP9:%.*]] = and i8 [[MASK]], 1
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP8]] to <8 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP9]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i1> [[TMP10]], <8 x i1> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP11]], <8 x i1> [[TMP11]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[PTR:%.*]] to i64
-; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080
-; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
-; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP2]], ptr [[TMP14]], i32 1, <4 x i1> [[EXTRACT]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP3]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i1> [[_MSPROP]] to i4
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i4 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]]
-; CHECK: 16:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 17:
-; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0(<4 x float> [[DATA:%.*]], ptr [[PTR]], i32 1, <4 x i1> [[EXTRACT]])
-; CHECK-NEXT: ret void
-;
- %1 = and i8 %mask, 1
- %2 = bitcast i8 %1 to <8 x i1>
- %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
- call void @llvm.masked.store.v4f32.p0(<4 x float> %data, ptr %ptr, i32 1, <4 x i1> %extract)
- ret void
-}
-declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) #1
-
-
-declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32)
-declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32)
-declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32)
-
-define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) #0 {
-; CHECK-LABEL: @test_vsubps_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) #0 {
-; CHECK-LABEL: @test_vsubps_rd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) #0 {
-; CHECK-LABEL: @test_vsubps_ru(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) #0 {
-; CHECK-LABEL: @test_vsubps_rz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) #0 {
-; CHECK-LABEL: @test_vmulps_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) #0 {
-; CHECK-LABEL: @test_vmulps_rd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) #0 {
-; CHECK-LABEL: @test_vmulps_ru(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) #0 {
-; CHECK-LABEL: @test_vmulps_rz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_vmulps_mask_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_vmulps_mask_rd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_vmulps_mask_ru(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_vmulps_mask_rz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_vmulps_mask_passthru_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
- ret <16 x float> %3
-}
-
-define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_vmulps_mask_passthru_rd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
- ret <16 x float> %3
-}
-
-define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_vmulps_mask_passthru_ru(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
- ret <16 x float> %3
-}
-
-define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_vmulps_mask_passthru_rz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[PASSTHRU:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
- ret <16 x float> %3
-}
-
-define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_vmulpd_mask_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP16]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 8)
- %2 = bitcast i8 %mask to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
- ret <8 x double> %3
-}
-
-define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_vmulpd_mask_rd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 9)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP16]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 9)
- %2 = bitcast i8 %mask to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
- ret <8 x double> %3
-}
-
-define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_vmulpd_mask_ru(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 10)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP16]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 10)
- %2 = bitcast i8 %mask to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
- ret <8 x double> %3
-}
-
-define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_vmulpd_mask_rz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 11)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP16]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a0, <8 x double> %a1, i32 11)
- %2 = bitcast i8 %mask to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
- ret <8 x double> %3
-}
-
-define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rn_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_add_round_ps_ru_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_add_round_ps_rz_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_add_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_add_round_ps_rn_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_add_round_ps_rd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_add_round_ps_ru_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_add_round_ps_rz_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_add_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_add_round_ps_rn_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_add_round_ps_rd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_add_round_ps_ru_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_add_round_ps_rz_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_add_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- ret <16 x float> %1
-}
-declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32)
-
-define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rn_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_sub_round_ps_ru_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_sub_round_ps_rz_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_sub_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_sub_round_ps_rn_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_sub_round_ps_rd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_sub_round_ps_ru_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_sub_round_ps_rz_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_sub_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rn_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_div_round_ps_ru_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_div_round_ps_rz_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_div_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_div_round_ps_rn_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_div_round_ps_rd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_div_round_ps_ru_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_div_round_ps_rz_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_div_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_div_round_ps_rn_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_div_round_ps_rd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 9)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 9)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_div_round_ps_ru_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 10)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 10)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_div_round_ps_rz_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 11)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 11)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_div_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- ret <16 x float> %1
-}
-declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32)
-
-define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_min_round_ps_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_min_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_min_round_ps_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_min_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_min_round_ps_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_min_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- ret <16 x float> %1
-}
-declare <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float>, <16 x float>, i32)
-
-define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_max_round_ps_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_maskz_max_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_max_round_ps_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_mask_max_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- %2 = bitcast i16 %mask to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %src
- ret <16 x float> %3
-}
-
-define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_max_round_ps_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
- ret <16 x float> %1
-}
-
-define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_mm512_max_round_ps_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
- ret <16 x float> %1
-}
-declare <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float>, <16 x float>, i32)
-
-declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
-
-define <4 x float> @test_mask_add_ss_rn(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_ss_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_mask_add_ss_rd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_ss_rd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 9)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 9)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_mask_add_ss_ru(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_ss_ru(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 10)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 10)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_mask_add_ss_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_ss_rz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 11)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 11)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_mask_add_ss_current(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_ss_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_maskz_add_ss_rn(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_add_ss_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_add_ss_rn(<4 x float> %a0, <4 x float> %a1) #0 {
-; CHECK-LABEL: @test_add_ss_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_mask_add_ss_current_memfold(<4 x float> %a0, ptr %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_ss_current_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
-; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
-; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
-; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
-; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
-; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
-; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %a1.val = load float, ptr %a1
- %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
- %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
- %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
- %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_maskz_add_ss_current_memfold(<4 x float> %a0, ptr %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_add_ss_current_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
-; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
-; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
-; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
-; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
-; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %a1.val = load float, ptr %a1
- %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
- %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
- %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
- %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
- %res = call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4)
- ret <4 x float> %res
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
-
-define <2 x double> @test_mask_add_sd_rn(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_sd_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_mask_add_sd_rd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_sd_rd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 9)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 9)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_mask_add_sd_ru(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_sd_ru(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 10)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 10)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_mask_add_sd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_sd_rz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 11)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 11)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_mask_add_sd_current(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_sd_current(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_maskz_add_sd_rn(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_add_sd_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_add_sd_rn(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_add_sd_rn(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 8)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_mask_add_sd_current_memfold(<2 x double> %a0, ptr %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_add_sd_current_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
-; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
-; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
-; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %a1.val = load double, ptr %a1
- %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
- %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_maskz_add_sd_current_memfold(<2 x double> %a0, ptr %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_add_sd_current_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
-; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
-; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %a1.val = load double, ptr %a1
- %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
- %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
- %res = call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4)
- ret <2 x double> %res
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
-
-define <4 x float> @test_mask_max_ss_sae(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_max_ss_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_maskz_max_ss_sae(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_max_ss_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_max_ss_sae(<4 x float> %a0, <4 x float> %a1) #0 {
-; CHECK-LABEL: @test_max_ss_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_mask_max_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_max_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_maskz_max_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_max_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 4)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_max_ss(<4 x float> %a0, <4 x float> %a1) #0 {
-; CHECK-LABEL: @test_max_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 4)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_mask_max_ss_memfold(<4 x float> %a0, ptr %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_max_ss_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
-; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
-; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
-; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
-; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
-; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
-; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %a1.val = load float, ptr %a1
- %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
- %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
- %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
- %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
- %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> %a2, i8 %mask, i32 4)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_maskz_max_ss_memfold(<4 x float> %a0, ptr %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_max_ss_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[A1_VAL:%.*]] = load float, ptr [[A1:%.*]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
-; CHECK-NEXT: [[A1V0:%.*]] = insertelement <4 x float> undef, float [[A1_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
-; CHECK-NEXT: [[A1V1:%.*]] = insertelement <4 x float> [[A1V0]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
-; CHECK-NEXT: [[A1V2:%.*]] = insertelement <4 x float> [[A1V1]], float 0.000000e+00, i32 2
-; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
-; CHECK-NEXT: [[A1V:%.*]] = insertelement <4 x float> [[A1V2]], float 0.000000e+00, i32 3
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[_MSPROP3]] to i128
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A0:%.*]], <4 x float> [[A1V]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %a1.val = load float, ptr %a1
- %a1v0 = insertelement <4 x float> undef, float %a1.val, i32 0
- %a1v1 = insertelement <4 x float> %a1v0, float 0.000000e+00, i32 1
- %a1v2 = insertelement <4 x float> %a1v1, float 0.000000e+00, i32 2
- %a1v = insertelement <4 x float> %a1v2, float 0.000000e+00, i32 3
- %res = call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>%a0, <4 x float> %a1v, <4 x float> zeroinitializer, i8 %mask, i32 4)
- ret <4 x float> %res
-}
-declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
-
-define <2 x double> @test_mask_max_sd_sae(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_max_sd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_maskz_max_sd_sae(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_max_sd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_max_sd_sae(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_max_sd_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 8)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 8)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_mask_max_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_max_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_maskz_max_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_max_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 4)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_max_sd(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_max_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_mask_max_sd_memfold(<2 x double> %a0, ptr %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_max_sd_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[A1]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
-; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
-; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
-; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSOR5]], [[_MSCMP6]]
-; CHECK-NEXT: br i1 [[_MSOR7]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %a1.val = load double, ptr %a1
- %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
- %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
- %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> %a2, i8 %mask, i32 4)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_maskz_max_sd_memfold(<2 x double> %a0, ptr %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_max_sd_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[A1_VAL:%.*]] = load double, ptr [[A1:%.*]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A1]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
-; CHECK-NEXT: [[A1V0:%.*]] = insertelement <2 x double> undef, double [[A1_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
-; CHECK-NEXT: [[A1V:%.*]] = insertelement <2 x double> [[A1V0]], double 0.000000e+00, i32 1
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[_MSPROP1]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
-; CHECK: 11:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 12:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A0:%.*]], <2 x double> [[A1V]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %a1.val = load double, ptr %a1
- %a1v0 = insertelement <2 x double> undef, double %a1.val, i32 0
- %a1v = insertelement <2 x double> %a1v0, double 0.000000e+00, i32 1
- %res = call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>%a0, <2 x double> %a1v, <2 x double> zeroinitializer, i8 %mask, i32 4)
- ret <2 x double> %res
-}
-
-define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) #0 {
-; CHECK-LABEL: @test_x86_avx512_cvtsi2ss32(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 11)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 11) ; <<<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float>, i32, i32) nounwind readnone
-
-define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) #0 {
-; CHECK-LABEL: @test_x86_avx512__mm_cvt_roundu32_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 9)
-; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-
-define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, ptr %ptr) #0 {
-; CHECK-LABEL: @test_x86_avx512__mm_cvt_roundu32_ss_mem(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
-; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSLD]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B]], i32 9)
-; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %b = load i32, ptr %ptr
- %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 9) ; <<<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-
-define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) #0 {
-; CHECK-LABEL: @test_x86_avx512__mm_cvtu32_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B:%.*]], i32 4)
-; CHECK-NEXT: store <4 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-
-define <4 x float> @test_x86_avx512__mm_cvtu32_ss_mem(<4 x float> %a, ptr %ptr) #0 {
-; CHECK-LABEL: @test_x86_avx512__mm_cvtu32_ss_mem(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[B:%.*]] = load i32, ptr [[PTR:%.*]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[PTR]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
-; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP7]], align 4
-; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i32 0
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[_MSLD]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> [[A:%.*]], i32 [[B]], i32 4)
-; CHECK-NEXT: store <4 x i32> [[TMP8]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %b = load i32, ptr %ptr
- %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1]
- ret <4 x float> %res
-}
-declare <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float>, i32, i32) nounwind readnone
-
-declare <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>)
-
-define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X2P]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]]
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]])
-; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP9]]
-;
- %x2 = load <16 x i32>, ptr %x2p
- %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
- ret <16 x i32> %1
-}
-
-define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
-; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
-; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]]
-; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[X1]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
-; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[X1]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP17]]
-;
- %x2 = load <16 x i32>, ptr %x2p
- %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2)
- %2 = bitcast i16 %x3 to <16 x i1>
- %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
- ret <16 x i32> %3
-}
-
-declare <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>)
-
-define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP9]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2)
- ret <8 x double> %1
-}
-
-define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x double> [[X2:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP14:%.*]] = select <8 x i1> [[TMP13]], <8 x i64> zeroinitializer, <8 x i64> [[TMP2]]
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x double> [[TMP10]] to <8 x i64>
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64>
-; CHECK-NEXT: [[TMP17:%.*]] = xor <8 x i64> [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = or <8 x i64> [[TMP17]], zeroinitializer
-; CHECK-NEXT: [[TMP19:%.*]] = or <8 x i64> [[TMP18]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP19]], <8 x i64> [[TMP14]]
-; CHECK-NEXT: [[TMP20:%.*]] = select <8 x i1> [[TMP13]], <8 x double> [[TMP10]], <8 x double> [[TMP11]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP20]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2)
- %2 = bitcast <8 x i64> %x1 to <8 x double>
- %3 = bitcast i8 %x3 to <8 x i1>
- %4 = select <8 x i1> %3, <8 x double> %1, <8 x double> %2
- ret <8 x double> %4
-}
-
-declare <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>)
-
-define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP9]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2)
- ret <16 x float> %1
-}
-
-define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x float> [[X2:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP14:%.*]] = select <16 x i1> [[TMP13]], <16 x i32> zeroinitializer, <16 x i32> [[TMP2]]
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32>
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32>
-; CHECK-NEXT: [[TMP17:%.*]] = xor <16 x i32> [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = or <16 x i32> [[TMP17]], zeroinitializer
-; CHECK-NEXT: [[TMP19:%.*]] = or <16 x i32> [[TMP18]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP19]], <16 x i32> [[TMP14]]
-; CHECK-NEXT: [[TMP20:%.*]] = select <16 x i1> [[TMP13]], <16 x float> [[TMP10]], <16 x float> [[TMP11]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP20]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2)
- %2 = bitcast <16 x i32> %x1 to <16 x float>
- %3 = bitcast i16 %x3 to <16 x i1>
- %4 = select <16 x i1> %3, <16 x float> %1, <16 x float> %2
- ret <16 x float> %4
-}
-
-declare <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>)
-
-define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]])
-; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP4]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
- ret <8 x i64> %1
-}
-
-define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> [[TMP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X1]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]]
-; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X1]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP12]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2)
- %2 = bitcast i8 %x3 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x1
- ret <8 x i64> %3
-}
-
-define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, ptr %x2p, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
-; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
-; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[_MSLD]]
-; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
-; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP17]]
-;
- %x2 = load <16 x i32>, ptr %x2p
- %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
- %2 = bitcast i16 %x3 to <16 x i1>
- %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
- ret <16 x i32> %3
-}
-
-define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, ptr %x2ptr, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[X2S:%.*]] = load double, ptr [[X2PTR:%.*]], align 8
-; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2PTR]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
-; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP9]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <8 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
-; CHECK-NEXT: [[X2INS:%.*]] = insertelement <8 x double> undef, double [[X2S]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> splat (i64 -1), <8 x i32> zeroinitializer
-; CHECK-NEXT: [[X2:%.*]] = shufflevector <8 x double> [[X2INS]], <8 x double> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x double> [[X2]])
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP17]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x double> [[TMP15]] to <8 x i64>
-; CHECK-NEXT: [[TMP20:%.*]] = xor <8 x i64> [[TMP19]], zeroinitializer
-; CHECK-NEXT: [[TMP21:%.*]] = or <8 x i64> [[TMP20]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = or <8 x i64> [[TMP21]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP16]], <8 x i64> [[TMP22]], <8 x i64> [[TMP18]]
-; CHECK-NEXT: [[TMP23:%.*]] = select <8 x i1> [[TMP17]], <8 x double> [[TMP15]], <8 x double> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP23]]
-;
- %x2s = load double, ptr %x2ptr
- %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
- %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
- %1 = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> %x1, <8 x i64> %x0, <8 x double> %x2)
- %2 = bitcast i8 %x3 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
- ret <8 x double> %3
-}
-
-define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x float> [[X2:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[TMP10]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP17]], <16 x i32> [[TMP13]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP12]], <16 x float> [[TMP10]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> %x1, <16 x i32> %x0, <16 x float> %x2)
- %2 = bitcast i16 %x3 to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0:%.*]], <8 x i64> [[X2:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP1]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]]
-; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP12]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> %x1, <8 x i64> %x0, <8 x i64> %x2)
- %2 = bitcast i8 %x3 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
- ret <8 x i64> %3
-}
-
-define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]])
-; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP4]]
-;
- %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
- ret <16 x i32> %1
-}
-
-define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], [[TMP3]]
-; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0:%.*]], <16 x i32> [[X2:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP1]]
-; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X1]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP1]]
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP1]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]]
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X1]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP12]]
-;
- %1 = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> %x1, <16 x i32> %x0, <16 x i32> %x2)
- %2 = bitcast i16 %x3 to <16 x i1>
- %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x1
- ret <16 x i32> %3
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 11)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x double> [[X2]], i8 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES2]]
-;
- %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 11)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 8)
- %res2 = fadd <8 x double> %res, %res1
- ret <8 x double> %res2
-}
-
-declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x float> [[X2:%.*]], i16 [[X3:%.*]], i32 10)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x float> [[X2]], i16 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES2]]
-;
- %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 10)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 8)
- %res2 = fadd <16 x float> %res, %res1
- ret <16 x float> %res2
-}
-
-declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8)
-
-define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qb_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i8> [[RES4]]
-;
- %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
- %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
- %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
- %res3 = add <16 x i8> %res0, %res1
- %res4 = add <16 x i8> %res3, %res2
- ret <16 x i8> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64>, i8)
-
-define void @test_int_x86_avx512_mask_pmov_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qb_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
- call void @llvm.x86.avx512.mask.pmov.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
- ret void
-}
-
-declare <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64>, <16 x i8>, i8)
-
-define <16 x i8>@test_int_x86_avx512_mask_pmovs_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qb_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i8> [[RES4]]
-;
- %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
- %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
- %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
- %res3 = add <16 x i8> %res0, %res1
- %res4 = add <16 x i8> %res3, %res2
- ret <16 x i8> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64>, i8)
-
-define void @test_int_x86_avx512_mask_pmovs_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qb_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
- call void @llvm.x86.avx512.mask.pmovs.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
- ret void
-}
-
-declare <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64>, <16 x i8>, i8)
-
-define <16 x i8>@test_int_x86_avx512_mask_pmovus_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qb_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0:%.*]], <16 x i8> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> [[X0]], <16 x i8> zeroinitializer, i8 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i8> [[RES4]]
-;
- %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 -1)
- %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2)
- %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.qb.512(<8 x i64> %x0, <16 x i8> zeroinitializer, i8 %x2)
- %res3 = add <16 x i8> %res0, %res1
- %res4 = add <16 x i8> %res3, %res2
- ret <16 x i8> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64>, i8)
-
-define void @test_int_x86_avx512_mask_pmovus_qb_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qb_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
- call void @llvm.x86.avx512.mask.pmovus.qb.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
- ret void
-}
-
-declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
-
-define <8 x i16>@test_int_x86_avx512_mask_pmov_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qw_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i16> [[RES4]]
-;
- %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
- %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
- %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
- %res3 = add <8 x i16> %res0, %res1
- %res4 = add <8 x i16> %res3, %res2
- ret <8 x i16> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64>, i8)
-
-define void @test_int_x86_avx512_mask_pmov_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qw_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
- call void @llvm.x86.avx512.mask.pmov.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
- ret void
-}
-
-declare <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64>, <8 x i16>, i8)
-
-define <8 x i16>@test_int_x86_avx512_mask_pmovs_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qw_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i16> [[RES4]]
-;
- %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
- %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
- %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovs.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
- %res3 = add <8 x i16> %res0, %res1
- %res4 = add <8 x i16> %res3, %res2
- ret <8 x i16> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64>, i8)
-
-define void @test_int_x86_avx512_mask_pmovs_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qw_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
- call void @llvm.x86.avx512.mask.pmovs.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
- ret void
-}
-
-declare <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64>, <8 x i16>, i8)
-
-define <8 x i16>@test_int_x86_avx512_mask_pmovus_qw_512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qw_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0:%.*]], <8 x i16> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i16> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> [[X0]], <8 x i16> zeroinitializer, i8 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <8 x i16> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <8 x i16> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <8 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i16> [[RES4]]
-;
- %res0 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 -1)
- %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> %x1, i8 %x2)
- %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovus.qw.512(<8 x i64> %x0, <8 x i16> zeroinitializer, i8 %x2)
- %res3 = add <8 x i16> %res0, %res1
- %res4 = add <8 x i16> %res3, %res2
- ret <8 x i16> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64>, i8)
-
-define void @test_int_x86_avx512_mask_pmovus_qw_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qw_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
- call void @llvm.x86.avx512.mask.pmovus.qw.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
- ret void
-}
-
-define <8 x i32>@test_int_x86_avx512_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_pmov_qd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
-; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32>
-; CHECK-NEXT: store <8 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[TMP2]]
-;
- %1 = trunc <8 x i64> %x0 to <8 x i32>
- ret <8 x i32> %1
-}
-
-define <8 x i32>@test_int_x86_avx512_mask_pmov_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[_MSPROP]], <8 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i32> [[TMP4]], [[X1:%.*]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[_MSPROP]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i32> [[TMP9]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP10]], <8 x i32> [[TMP7]]
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP4]], <8 x i32> [[X1]]
-; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[TMP11]]
-;
- %1 = trunc <8 x i64> %x0 to <8 x i32>
- %2 = bitcast i8 %x2 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %x1
- ret <8 x i32> %3
-}
-
-define <8 x i32>@test_int_x86_avx512_maskz_pmov_qd_512(<8 x i64> %x0, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_pmov_qd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = trunc <8 x i64> [[TMP1]] to <8 x i32>
-; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i32>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X2:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[_MSPROP]], <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i32> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i32> [[TMP7]], [[_MSPROP]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP4]], <8 x i32> [[TMP9]], <8 x i32> [[TMP6]]
-; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP3]], <8 x i32> zeroinitializer
-; CHECK-NEXT: store <8 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[TMP10]]
-;
- %1 = trunc <8 x i64> %x0 to <8 x i32>
- %2 = bitcast i8 %x2 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> zeroinitializer
- ret <8 x i32> %3
-}
-
-declare void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64>, i8)
-
-define void @test_int_x86_avx512_mask_pmov_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_qd_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
- call void @llvm.x86.avx512.mask.pmov.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
- ret void
-}
-
-declare <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64>, <8 x i32>, i8)
-
-define <8 x i32>@test_int_x86_avx512_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_pmovs_qd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[RES]]
-;
- %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
- ret <8 x i32> %res
-}
-
-define <8 x i32>@test_int_x86_avx512_mask_pmovs_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]])
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[RES]]
-;
- %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
- ret <8 x i32> %res
-}
-
-define <8 x i32>@test_int_x86_avx512_maskz_pmovs_qd_512(<8 x i64> %x0, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovs_qd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[X2:%.*]])
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[RES]]
-;
- %res = call <8 x i32> @llvm.x86.avx512.mask.pmovs.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
- ret <8 x i32> %res
-}
-
-declare void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64>, i8)
-
-define void @test_int_x86_avx512_mask_pmovs_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_qd_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
- call void @llvm.x86.avx512.mask.pmovs.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
- ret void
-}
-
-declare <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64>, <8 x i32>, i8)
-
-define <8 x i32>@test_int_x86_avx512_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_pmovus_qd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[RES]]
-;
- %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 -1)
- ret <8 x i32> %res
-}
-
-define <8 x i32>@test_int_x86_avx512_mask_pmovus_qd_512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]])
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[RES]]
-;
- %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> %x1, i8 %x2)
- ret <8 x i32> %res
-}
-
-define <8 x i32>@test_int_x86_avx512_maskz_pmovus_qd_512(<8 x i64> %x0, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_pmovus_qd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP2]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> [[X0:%.*]], <8 x i32> zeroinitializer, i8 [[X2:%.*]])
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[RES]]
-;
- %res = call <8 x i32> @llvm.x86.avx512.mask.pmovus.qd.512(<8 x i64> %x0, <8 x i32> zeroinitializer, i8 %x2)
- ret <8 x i32> %res
-}
-
-declare void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64>, i8)
-
-define void @test_int_x86_avx512_mask_pmovus_qd_mem_512(ptr %ptr, <8 x i64> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_qd_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr [[PTR:%.*]], <8 x i64> [[X1:%.*]], i8 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr [[PTR]], <8 x i64> [[X1]], i8 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 -1)
- call void @llvm.x86.avx512.mask.pmovus.qd.mem.512(ptr %ptr, <8 x i64> %x1, i8 %x2)
- ret void
-}
-
-declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
-
-define <16 x i8>@test_int_x86_avx512_mask_pmov_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_db_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i8> [[RES4]]
-;
- %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
- %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
- %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
- %res3 = add <16 x i8> %res0, %res1
- %res4 = add <16 x i8> %res3, %res2
- ret <16 x i8> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32>, i16)
-
-define void @test_int_x86_avx512_mask_pmov_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_db_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
- call void @llvm.x86.avx512.mask.pmov.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
- ret void
-}
-
-declare <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32>, <16 x i8>, i16)
-
-define <16 x i8>@test_int_x86_avx512_mask_pmovs_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_db_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i8> [[RES4]]
-;
- %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
- %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
- %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
- %res3 = add <16 x i8> %res0, %res1
- %res4 = add <16 x i8> %res3, %res2
- ret <16 x i8> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32>, i16)
-
-define void @test_int_x86_avx512_mask_pmovs_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_db_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
- call void @llvm.x86.avx512.mask.pmovs.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
- ret void
-}
-
-declare <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32>, <16 x i8>, i16)
-
-define <16 x i8>@test_int_x86_avx512_mask_pmovus_db_512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_db_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0:%.*]], <16 x i8> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> [[X0]], <16 x i8> zeroinitializer, i16 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <16 x i8> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <16 x i8> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i8> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i8> [[RES4]]
-;
- %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 -1)
- %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> %x1, i16 %x2)
- %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.db.512(<16 x i32> %x0, <16 x i8> zeroinitializer, i16 %x2)
- %res3 = add <16 x i8> %res0, %res1
- %res4 = add <16 x i8> %res3, %res2
- ret <16 x i8> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32>, i16)
-
-define void @test_int_x86_avx512_mask_pmovus_db_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_db_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
- call void @llvm.x86.avx512.mask.pmovus.db.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
- ret void
-}
-
-declare <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32>, <16 x i16>, i16)
-
-define <16 x i16>@test_int_x86_avx512_mask_pmov_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_dw_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i16> [[RES4]]
-;
- %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
- %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
- %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmov.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
- %res3 = add <16 x i16> %res0, %res1
- %res4 = add <16 x i16> %res3, %res2
- ret <16 x i16> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32>, i16)
-
-define void @test_int_x86_avx512_mask_pmov_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmov_dw_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
- call void @llvm.x86.avx512.mask.pmov.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
- ret void
-}
-
-declare <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32>, <16 x i16>, i16)
-
-define <16 x i16>@test_int_x86_avx512_mask_pmovs_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_dw_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i16> [[RES4]]
-;
- %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
- %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
- %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovs.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
- %res3 = add <16 x i16> %res0, %res1
- %res4 = add <16 x i16> %res3, %res2
- ret <16 x i16> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32>, i16)
-
-define void @test_int_x86_avx512_mask_pmovs_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovs_dw_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
- call void @llvm.x86.avx512.mask.pmovs.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
- ret void
-}
-
-declare <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32>, <16 x i16>, i16)
-
-define <16 x i16>@test_int_x86_avx512_mask_pmovus_dw_512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_dw_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0:%.*]], <16 x i16> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i16> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i256 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSCMP7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> [[X0]], <16 x i16> zeroinitializer, i16 [[X2]])
-; CHECK-NEXT: [[RES3:%.*]] = add <16 x i16> [[RES0]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = add <16 x i16> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i16> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i16> [[RES4]]
-;
- %res0 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 -1)
- %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> %x1, i16 %x2)
- %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovus.dw.512(<16 x i32> %x0, <16 x i16> zeroinitializer, i16 %x2)
- %res3 = add <16 x i16> %res0, %res1
- %res4 = add <16 x i16> %res3, %res2
- ret <16 x i16> %res4
-}
-
-declare void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32>, i16)
-
-define void @test_int_x86_avx512_mask_pmovus_dw_mem_512(ptr %ptr, <16 x i32> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pmovus_dw_mem_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 72) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr [[PTR:%.*]], <16 x i32> [[X1:%.*]], i16 -1)
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSOR4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr [[PTR]], <16 x i32> [[X1]], i16 [[X2:%.*]])
-; CHECK-NEXT: ret void
-;
- call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 -1)
- call void @llvm.x86.avx512.mask.pmovus.dw.mem.512(ptr %ptr, <16 x i32> %x1, i16 %x2)
- ret void
-}
-
-declare <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32>, i32)
-
-define <16 x float>@test_int_x86_avx512_mask_cvt_dq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_dq2ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[CVT:%.*]] = sitofp <16 x i32> [[X0:%.*]] to <16 x float>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[CVT]] to <16 x i32>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP1]]
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP11]], <16 x i32> [[TMP6]]
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[CVT]], <16 x float> [[X1]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP13]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
-; CHECK: 14:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 15:
-; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> [[X0]], i32 8)
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], zeroinitializer
-; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[TMP12]], [[TMP16]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES2]]
-;
- %cvt = sitofp <16 x i32> %x0 to <16 x float>
- %1 = bitcast i16 %x2 to <16 x i1>
- %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
- %3 = call <16 x float> @llvm.x86.avx512.sitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8)
- %res2 = fadd <16 x float> %2, %3
- ret <16 x float> %res2
-}
-
-declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
-
-define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2dq_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[RES2]]
-;
- %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
- %res2 = add <8 x i32> %res, %res1
- ret <8 x i32> %res2
-}
-
-declare <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double>, <8 x float>, i8, i32)
-
-define <8 x float>@test_int_x86_avx512_mask_cvt_pd2ps_512(<8 x double> %x0, <8 x float> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> [[X0:%.*]], <8 x float> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> [[X0]], <8 x float> [[X1]], i8 -1, i32 10)
-; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x float> [[RES]], [[RES1]]
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x float> [[RES2]]
-;
- %res = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 %x2, i32 4)
- %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtpd2ps.512(<8 x double> %x0, <8 x float> %x1, i8 -1, i32 10)
- %res2 = fadd <8 x float> %res, %res1
- ret <8 x float> %res2
-}
-
-declare <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
-
-define <8 x i32>@test_int_x86_avx512_mask_cvt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_pd2udq_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 10)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[RES2]]
-;
- %res = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 10)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvtpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
- %res2 = add <8 x i32> %res, %res1
- ret <8 x i32> %res2
-}
-
-declare <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float>, <16 x i32>, i16, i32)
-
-define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2dq_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 10)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
- %res2 = add <16 x i32> %res, %res1
- ret <16 x i32> %res2
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float>, <8 x double>, i8, i32)
-
-define <8 x double>@test_int_x86_avx512_mask_cvt_ps2pd_512(<8 x float> %x0, <8 x double> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i256 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> [[X0:%.*]], <8 x double> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP1]] to i256
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i256 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> [[X0]], <8 x double> [[X1]], i8 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES2]]
-;
- %res = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 %x2, i32 4)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtps2pd.512(<8 x float> %x0, <8 x double> %x1, i8 -1, i32 8)
- %res2 = fadd <8 x double> %res, %res1
- ret <8 x double> %res2
-}
-
-declare <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float>, <16 x i32>, i16, i32)
-
-define <16 x i32>@test_int_x86_avx512_mask_cvt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ps2udq_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 10)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 10)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvtps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
- %res2 = add <16 x i32> %res, %res1
- ret <16 x i32> %res2
-}
-
-declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double>, <8 x i32>, i8, i32)
-
-define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_pd2dq_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[RES2]]
-;
- %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2dq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
- %res2 = add <8 x i32> %res, %res1
- ret <8 x i32> %res2
-}
-
-declare <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32>, i32)
-
-define <16 x float>@test_int_x86_avx512_mask_cvt_udq2ps_512(<16 x i32> %x0, <16 x float> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_udq2ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[CVT:%.*]] = uitofp <16 x i32> [[X0:%.*]] to <16 x float>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[X2:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP1]], <16 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[CVT]] to <16 x i32>
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[X1:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP1]]
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP4]], <16 x i32> [[TMP11]], <16 x i32> [[TMP6]]
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP5]], <16 x float> [[CVT]], <16 x float> [[X1]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP13]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
-; CHECK: 14:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 15:
-; CHECK-NEXT: [[TMP16:%.*]] = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> [[X0]], i32 8)
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[_MSPROP_SELECT]], zeroinitializer
-; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[TMP12]], [[TMP16]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES2]]
-;
- %cvt = uitofp <16 x i32> %x0 to <16 x float>
- %1 = bitcast i16 %x2 to <16 x i1>
- %2 = select <16 x i1> %1, <16 x float> %cvt, <16 x float> %x1
- %3 = call <16 x float> @llvm.x86.avx512.uitofp.round.v16f32.v16i32(<16 x i32> %x0, i32 8)
- %res2 = fadd <16 x float> %2, %3
- ret <16 x float> %res2
-}
-
-declare <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double>, <8 x i32>, i8, i32)
-
-define <8 x i32>@test_int_x86_avx512_mask_cvtt_pd2udq_512(<8 x double> %x0, <8 x i32> %x1, i8 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_pd2udq_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i256 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> [[X0:%.*]], <8 x i32> [[X1:%.*]], i8 [[X2:%.*]], i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP2]] to i256
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i256 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> [[X0]], <8 x i32> [[X1]], i8 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = add <8 x i32> [[RES]], [[RES1]]
-; CHECK-NEXT: store <8 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i32> [[RES2]]
-;
- %res = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 %x2, i32 4)
- %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttpd2udq.512(<8 x double> %x0, <8 x i32> %x1, i8 -1, i32 8)
- %res2 = add <8 x i32> %res, %res1
- ret <8 x i32> %res2
-}
-
-declare <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float>, <16 x i32>, i16, i32)
-
-define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_ps2dq_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2dq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
- %res2 = add <16 x i32> %res, %res1
- ret <16 x i32> %res2
-}
-
-declare <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float>, <16 x i32>, i16, i32)
-
-define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16 x i32> %x1, i16 %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvtt_ps2udq_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]], i16 [[X2:%.*]], i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> [[X0]], <16 x i32> [[X1]], i16 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES]], [[RES1]]
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 %x2, i32 4)
- %res1 = call <16 x i32> @llvm.x86.avx512.mask.cvttps2udq.512(<16 x float> %x0, <16 x i32> %x1, i16 -1, i32 8)
- %res2 = add <16 x i32> %res, %res1
- ret <16 x i32> %res2
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone
-
-define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1) #0 {
-; CHECK-LABEL: @test_getexp_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 -1, i32 8)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 -1, i32 8)
- ret <4 x float> %res
-}
-
-define <4 x float> @test_mask_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_getexp_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES0:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
-; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0]], <4 x float> [[A1]], <4 x float> [[A2]], i8 [[MASK]], i32 8)
-; CHECK-NEXT: [[RES_1:%.*]] = fadd <4 x float> [[RES0]], [[RES1]]
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES_1]]
-;
- %res0 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 4)
- %res1 = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> %a2, i8 %mask, i32 8)
- %res.1 = fadd <4 x float> %res0, %res1
- ret <4 x float> %res.1
-}
-
-define <4 x float> @test_maskz_getexp_ss(<4 x float> %a0, <4 x float> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_getexp_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <4 x float> zeroinitializer, i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>%a0, <4 x float> %a1, <4 x float> zeroinitializer, i8 %mask, i32 8)
- ret <4 x float> %res
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32) nounwind readnone
-
-define <2 x double> @test_getexp_sd(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_getexp_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 -1, i32 4)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 -1, i32 4)
- ret <2 x double> %res
-}
-
-define <2 x double> @test_mask_getexp_sd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_mask_getexp_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES0:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> [[A2:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSOR10]], [[_MSCMP11]]
-; CHECK-NEXT: br i1 [[_MSOR12]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0]], <2 x double> [[A1]], <2 x double> [[A2]], i8 [[MASK]], i32 8)
-; CHECK-NEXT: [[RES_1:%.*]] = fadd <2 x double> [[RES0]], [[RES1]]
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES_1]]
-;
- %res0 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 4)
- %res1 = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> %a2, i8 %mask, i32 8)
- %res.1 = fadd <2 x double> %res0, %res1
- ret <2 x double> %res.1
-}
-
-define <2 x double> @test_maskz_getexp_sd(<2 x double> %a0, <2 x double> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_maskz_getexp_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], <2 x double> zeroinitializer, i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.getexp.sd(<2 x double>%a0, <2 x double> %a1, <2 x double> zeroinitializer, i8 %mask, i32 8)
- ret <2 x double> %res
-}
-
-declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
-
-define i8 at test_int_x86_avx512_mask_cmp_sd(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 5, i8 [[X3:%.*]], i32 8)
-; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i8 [[RES4]]
-;
- %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
- ret i8 %res4
-}
-
-define i8 at test_int_x86_avx512_mask_cmp_sd_all(<2 x double> %x0, <2 x double> %x1, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_sd_all(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 2, i8 -1, i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 3, i8 -1, i32 8)
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP13]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]]
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
-; CHECK: 14:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 15:
-; CHECK-NEXT: [[RES3:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 4, i8 [[X3:%.*]], i32 4)
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP10:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP17]], 0
-; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSCMP10]], [[_MSCMP11]]
-; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR14:%.*]] = or i1 [[_MSOR12]], [[_MSCMP13]]
-; CHECK-NEXT: br i1 [[_MSOR14]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
-; CHECK: 18:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 19:
-; CHECK-NEXT: [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 5, i8 [[X3]], i32 8)
-; CHECK-NEXT: [[TMP20:%.*]] = xor i8 [[RES1]], -1
-; CHECK-NEXT: [[TMP21:%.*]] = xor i8 [[RES2]], -1
-; CHECK-NEXT: [[TMP22:%.*]] = and i8 [[TMP20]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = and i8 0, [[TMP21]]
-; CHECK-NEXT: [[TMP24:%.*]] = or i8 0, [[TMP22]]
-; CHECK-NEXT: [[TMP25:%.*]] = or i8 [[TMP24]], [[TMP23]]
-; CHECK-NEXT: [[RES11:%.*]] = or i8 [[RES1]], [[RES2]]
-; CHECK-NEXT: [[TMP26:%.*]] = xor i8 [[RES3]], -1
-; CHECK-NEXT: [[TMP27:%.*]] = xor i8 [[RES4]], -1
-; CHECK-NEXT: [[TMP28:%.*]] = and i8 [[TMP26]], 0
-; CHECK-NEXT: [[TMP29:%.*]] = and i8 0, [[TMP27]]
-; CHECK-NEXT: [[TMP30:%.*]] = or i8 0, [[TMP28]]
-; CHECK-NEXT: [[TMP31:%.*]] = or i8 [[TMP30]], [[TMP29]]
-; CHECK-NEXT: [[RES12:%.*]] = or i8 [[RES3]], [[RES4]]
-; CHECK-NEXT: [[TMP32:%.*]] = xor i8 [[RES11]], -1
-; CHECK-NEXT: [[TMP33:%.*]] = xor i8 [[RES12]], -1
-; CHECK-NEXT: [[TMP34:%.*]] = and i8 [[TMP25]], [[TMP31]]
-; CHECK-NEXT: [[TMP35:%.*]] = and i8 [[TMP32]], [[TMP31]]
-; CHECK-NEXT: [[TMP36:%.*]] = and i8 [[TMP25]], [[TMP33]]
-; CHECK-NEXT: [[TMP37:%.*]] = or i8 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP38:%.*]] = or i8 [[TMP37]], [[TMP36]]
-; CHECK-NEXT: [[RES13:%.*]] = or i8 [[RES11]], [[RES12]]
-; CHECK-NEXT: store i8 [[TMP38]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i8 [[RES13]]
-;
- %res1 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 2, i8 -1, i32 4)
- %res2 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 3, i8 -1, i32 8)
- %res3 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 4, i8 %x3, i32 4)
- %res4 = call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %x0, <2 x double> %x1, i32 5, i8 %x3, i32 8)
-
- %res11 = or i8 %res1, %res2
- %res12 = or i8 %res3, %res4
- %res13 = or i8 %res11, %res12
- ret i8 %res13
-}
-
-declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
-
-define i8 at test_int_x86_avx512_mask_cmp_ss(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 3, i8 [[X3:%.*]], i32 4)
-; CHECK-NEXT: store i8 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i8 [[RES2]]
-;
- %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 %x3, i32 4)
- ret i8 %res2
-}
-
-
-define i8 at test_int_x86_avx512_mask_cmp_ss_all(<4 x float> %x0, <4 x float> %x1, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cmp_ss_all(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES1:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 2, i8 -1, i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES2:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 3, i8 -1, i32 8)
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP13]], 0
-; CHECK-NEXT: [[_MSOR7:%.*]] = or i1 [[_MSCMP5]], [[_MSCMP6]]
-; CHECK-NEXT: [[_MSCMP8:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR9:%.*]] = or i1 [[_MSOR7]], [[_MSCMP8]]
-; CHECK-NEXT: br i1 [[_MSOR9]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
-; CHECK: 14:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 15:
-; CHECK-NEXT: [[RES3:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 4, i8 [[X3:%.*]], i32 4)
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP10:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP17]], 0
-; CHECK-NEXT: [[_MSOR12:%.*]] = or i1 [[_MSCMP10]], [[_MSCMP11]]
-; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR14:%.*]] = or i1 [[_MSOR12]], [[_MSCMP13]]
-; CHECK-NEXT: br i1 [[_MSOR14]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
-; CHECK: 18:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 19:
-; CHECK-NEXT: [[RES4:%.*]] = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 5, i8 [[X3]], i32 8)
-; CHECK-NEXT: [[TMP20:%.*]] = and i8 [[RES1]], 0
-; CHECK-NEXT: [[TMP21:%.*]] = and i8 0, [[RES2]]
-; CHECK-NEXT: [[TMP22:%.*]] = or i8 0, [[TMP20]]
-; CHECK-NEXT: [[TMP23:%.*]] = or i8 [[TMP22]], [[TMP21]]
-; CHECK-NEXT: [[RES11:%.*]] = and i8 [[RES1]], [[RES2]]
-; CHECK-NEXT: [[TMP24:%.*]] = and i8 [[RES3]], 0
-; CHECK-NEXT: [[TMP25:%.*]] = and i8 0, [[RES4]]
-; CHECK-NEXT: [[TMP26:%.*]] = or i8 0, [[TMP24]]
-; CHECK-NEXT: [[TMP27:%.*]] = or i8 [[TMP26]], [[TMP25]]
-; CHECK-NEXT: [[RES12:%.*]] = and i8 [[RES3]], [[RES4]]
-; CHECK-NEXT: [[TMP28:%.*]] = and i8 [[TMP23]], [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = and i8 [[RES11]], [[TMP27]]
-; CHECK-NEXT: [[TMP30:%.*]] = and i8 [[TMP23]], [[RES12]]
-; CHECK-NEXT: [[TMP31:%.*]] = or i8 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP32:%.*]] = or i8 [[TMP31]], [[TMP30]]
-; CHECK-NEXT: [[RES13:%.*]] = and i8 [[RES11]], [[RES12]]
-; CHECK-NEXT: store i8 [[TMP32]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i8 [[RES13]]
-;
- %res1 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 2, i8 -1, i32 4)
- %res2 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 3, i8 -1, i32 8)
- %res3 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 4, i8 %x3, i32 4)
- %res4 = call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %x0, <4 x float> %x1, i32 5, i8 %x3, i32 8)
-
- %res11 = and i8 %res1, %res2
- %res12 = and i8 %res3, %res4
- %res13 = and i8 %res11, %res12
- ret i8 %res13
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double>, i32, <8 x double>, i8, i32)
-
-define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i8 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> [[X0:%.*]], i32 11, <8 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> [[X0]], i32 11, <8 x double> [[X2]], i8 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES2]]
-;
- %res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 -1, i32 8)
- %res2 = fadd <8 x double> %res, %res1
- ret <8 x double> %res2
-}
-
-declare <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float>, i32, <16 x float>, i16, i32)
-
-define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i16 [[TMP3]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> [[X0:%.*]], i32 11, <16 x float> [[X2:%.*]], i16 [[X3:%.*]], i32 4)
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i512 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP5:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR6:%.*]] = or i1 [[_MSCMP4]], [[_MSCMP5]]
-; CHECK-NEXT: br i1 [[_MSOR6]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 10:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 11:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> [[X0]], i32 11, <16 x float> [[X2]], i16 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES2]]
-;
- %res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 -1, i32 8)
- %res2 = fadd <16 x float> %res, %res1
- ret <16 x float> %res2
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double>, <2 x double>, i32, <2 x double>, i8, i32)
-
-define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], i32 11, <2 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 12, <2 x double> zeroinitializer, i8 [[X3]], i32 4)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
-; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 13, <2 x double> [[X2]], i8 [[X3]], i32 8)
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i128 [[TMP19]], 0
-; CHECK-NEXT: [[TMP20:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP20]], 0
-; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSCMP18]], [[_MSCMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP21:%.*]] = icmp ne i128 [[TMP21]], 0
-; CHECK-NEXT: [[_MSOR22:%.*]] = or i1 [[_MSOR20]], [[_MSCMP21]]
-; CHECK-NEXT: br i1 [[_MSOR22]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
-; CHECK: 22:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 23:
-; CHECK-NEXT: [[RES3:%.*]] = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> [[X0]], <2 x double> [[X1]], i32 14, <2 x double> [[X2]], i8 -1, i32 4)
-; CHECK-NEXT: [[RES11:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES12:%.*]] = fadd <2 x double> [[RES2]], [[RES3]]
-; CHECK-NEXT: [[RES13:%.*]] = fadd <2 x double> [[RES11]], [[RES12]]
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES13]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 11, <2 x double> %x2, i8 %x3, i32 4)
- %res1 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 12, <2 x double> zeroinitializer, i8 %x3, i32 4)
- %res2 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 13, <2 x double> %x2, i8 %x3, i32 8)
- %res3 = call <2 x double> @llvm.x86.avx512.mask.getmant.sd(<2 x double> %x0, <2 x double> %x1, i32 14, <2 x double> %x2, i8 -1, i32 4)
- %res11 = fadd <2 x double> %res, %res1
- %res12 = fadd <2 x double> %res2, %res3
- %res13 = fadd <2 x double> %res11, %res12
- ret <2 x double> %res13
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float>, <4 x float>, i32, <4 x float>, i8, i32)
-
-define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], i32 11, <4 x float> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 12, <4 x float> zeroinitializer, i8 [[X3]], i32 4)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 13, <4 x float> [[X2]], i8 -1, i32 8)
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i128 [[TMP19]], 0
-; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i128 [[TMP20]], 0
-; CHECK-NEXT: [[_MSOR18:%.*]] = or i1 [[_MSCMP16]], [[_MSCMP17]]
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i128 [[TMP21]], 0
-; CHECK-NEXT: [[_MSOR20:%.*]] = or i1 [[_MSOR18]], [[_MSCMP19]]
-; CHECK-NEXT: br i1 [[_MSOR20]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
-; CHECK: 22:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 23:
-; CHECK-NEXT: [[RES3:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0]], <4 x float> [[X1]], i32 14, <4 x float> [[X2]], i8 -1, i32 4)
-; CHECK-NEXT: [[RES11:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES12:%.*]] = fadd <4 x float> [[RES2]], [[RES3]]
-; CHECK-NEXT: [[RES13:%.*]] = fadd <4 x float> [[RES11]], [[RES12]]
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES13]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> %x2, i8 %x3, i32 4)
- %res1 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 12, <4 x float> zeroinitializer, i8 %x3, i32 4)
- %res2 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 13, <4 x float> %x2, i8 -1, i32 8)
- %res3 = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 14, <4 x float> %x2, i8 -1, i32 4)
- %res11 = fadd <4 x float> %res, %res1
- %res12 = fadd <4 x float> %res2, %res3
- %res13 = fadd <4 x float> %res11, %res12
- ret <4 x float> %res13
-}
-
-define <4 x float> @test_int_x86_avx512_mask_getmant_ss_load(<4 x float> %x0, ptr %x1p) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_getmant_ss_load(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[X1:%.*]] = load <4 x float>, ptr [[X1P:%.*]], align 16
-; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[X1P]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
-; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP8]], 0
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[_MSLD]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1]], i32 11, <4 x float> undef, i8 -1, i32 4)
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES]]
-;
- %x1 = load <4 x float>, ptr %x1p
- %res = call <4 x float> @llvm.x86.avx512.mask.getmant.ss(<4 x float> %x0, <4 x float> %x1, i32 11, <4 x float> undef, i8 -1, i32 4)
- ret <4 x float> %res
-}
-
-declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
-
-define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES]]
-;
- %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
- ret <8 x double> %res
-}
-
-define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %mask) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]]
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP10]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x double> [[RES]], <8 x double> [[X2]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES2]]
-;
- %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> %x2
- ret <8 x double> %res2
-}
-
-define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_maskz(<8 x double> %x0, <8 x i64> %x1, i8 %mask) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512_maskz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64>
-; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP13]], <8 x i64> [[TMP9]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x double> [[RES]], <8 x double> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES2]]
-;
- %res = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1)
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x double> %res, <8 x double> zeroinitializer
- ret <8 x double> %res2
-}
-
-declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>)
-
-define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES]]
-;
- %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
- ret <16 x float> %res
-}
-
-define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP10]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> [[X2]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES2]]
-;
- %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
- ret <16 x float> %res2
-}
-
-define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_maskz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
-; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP13]], <16 x i32> [[TMP9]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES2]]
-;
- %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1)
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
- ret <16 x float> %res2
-}
-
-define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
-; CHECK: 3:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 4:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES]]
-;
- %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
- ret <16 x float> %res
-}
-
-define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool_mask(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP13]], <16 x i32> [[TMP8]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> [[X2]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES2]]
-;
- %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> %x2
- ret <16 x float> %res2
-}
-
-define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_constant_pool_maskz(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0:%.*]], <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP7]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x float> [[RES]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES2]]
-;
- %res = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>)
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x float> %res, <16 x float> zeroinitializer
- ret <16 x float> %res2
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double>, <4 x float>, <2 x double>, i8, i32)
-
-define <2 x double>@test_int_x86_avx512_mask_cvt_ss2sd_round(<2 x double> %x0,<4 x float> %x1, <2 x double> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_ss2sd_round(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> [[X0:%.*]], <4 x float> [[X1:%.*]], <2 x double> [[X2:%.*]], i8 [[X3:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> [[X0]], <4 x float> [[X1]], <2 x double> [[X2]], i8 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES2]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 %x3, i32 4)
- %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtss2sd.round(<2 x double> %x0, <4 x float> %x1, <2 x double> %x2, i8 -1, i32 8)
- %res2 = fadd <2 x double> %res, %res1
- ret <2 x double> %res2
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float>, <2 x double>, <4 x float>, i8, i32)
-
-define <4 x float>@test_int_x86_avx512_mask_cvt_sd2ss_round(<4 x float> %x0,<2 x double> %x1, <4 x float> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_cvt_sd2ss_round(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> [[X0:%.*]], <2 x double> [[X1:%.*]], <4 x float> [[X2:%.*]], i8 [[X3:%.*]], i32 11)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i128 [[TMP12]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 14:
-; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> [[X0]], <2 x double> [[X1]], <4 x float> [[X2]], i8 -1, i32 8)
-; CHECK-NEXT: [[RES2:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES2]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 %x3, i32 11)
- %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtsd2ss.round(<4 x float> %x0, <2 x double> %x1, <4 x float> %x2, i8 -1, i32 8)
- %res2 = fadd <4 x float> %res, %res1
- ret <4 x float> %res2
-}
-
-declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32)
-
-define <16 x i32>@test_int_x86_avx512_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_pternlog_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP9]]
-;
- %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
- ret <16 x i32> %1
-}
-
-define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pternlog_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33)
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> [[TMP1]]
-; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], [[X0]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP1]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
-; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> [[X0]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP17]]
-;
- %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
- %2 = bitcast i16 %x4 to <16 x i1>
- %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x0
- ret <16 x i32> %3
-}
-
-define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_pternlog_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 33)
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X4:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
-; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP17]]
-;
- %1 = call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33)
- %2 = bitcast i16 %x4 to <16 x i1>
- %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
- ret <16 x i32> %3
-}
-
-declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32)
-
-define <8 x i64>@test_int_x86_avx512_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_pternlog_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33)
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP9]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
- ret <8 x i64> %1
-}
-
-define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_pternlog_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33)
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> zeroinitializer, <8 x i64> [[TMP1]]
-; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], [[X0]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP1]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
-; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> [[X0]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP17]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
- %2 = bitcast i8 %x4 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x0
- ret <8 x i64> %3
-}
-
-define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_pternlog_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: br i1 [[_MSOR3]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 33)
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X4:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[TMP10]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
-; CHECK-NEXT: [[TMP17:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP17]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33)
- %2 = bitcast i8 %x4 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
- ret <8 x i64> %3
-}
-
-define i32 @test_x86_avx512_comi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_comi_sd_eq_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 0, i32 8)
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES]]
-;
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 8)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_ucomi_sd_eq_sae(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_ucomi_sd_eq_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 8, i32 8)
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES]]
-;
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 8)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_comi_sd_eq(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_comi_sd_eq(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 0, i32 4)
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES]]
-;
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 0, i32 4)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_ucomi_sd_eq(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_ucomi_sd_eq(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 8, i32 4)
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES]]
-;
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 8, i32 4)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_comi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_comi_sd_lt_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 1, i32 8)
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES]]
-;
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 8)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_ucomi_sd_lt_sae(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_ucomi_sd_lt_sae(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 9, i32 8)
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES]]
-;
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 8)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_comi_sd_lt(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_comi_sd_lt(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 1, i32 4)
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES]]
-;
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 1, i32 4)
- ret i32 %res
-}
-
-define i32 @test_x86_avx512_ucomi_sd_lt(<2 x double> %a0, <2 x double> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_ucomi_sd_lt(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[A0:%.*]], <2 x double> [[A1:%.*]], i32 9, i32 4)
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES]]
-;
- %res = call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %a0, <2 x double> %a1, i32 9, i32 4)
- ret i32 %res
-}
-
-declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
-
-define i32 @test_x86_avx512_ucomi_ss_lt(<4 x float> %a0, <4 x float> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_ucomi_ss_lt(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[RES:%.*]] = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], i32 9, i32 4)
-; CHECK-NEXT: store i32 0, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret i32 [[RES]]
-;
- %res = call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %a0, <4 x float> %a1, i32 9, i32 4)
- ret i32 %res
-}
-
-declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
-
-declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
-
-define <8 x double>@test_int_x86_avx512_permvar_df_512(<8 x double> %x0, <8 x i64> %x1) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_permvar_df_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP7]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
- ret <8 x double> %1
-}
-
-define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_df_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> zeroinitializer, <8 x i64> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x double> [[X2:%.*]] to <8 x i64>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <8 x i64> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <8 x i64> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> [[TMP17]], <8 x i64> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <8 x i1> [[TMP11]], <8 x double> [[TMP9]], <8 x double> [[X2]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP18]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
- %2 = bitcast i8 %x3 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %x2
- ret <8 x double> %3
-}
-
-define <8 x double>@test_int_x86_avx512_maskz_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_df_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1:%.*]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP10]], <8 x i64> zeroinitializer, <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP15]], <8 x i64> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <8 x i1> [[TMP10]], <8 x double> [[TMP8]], <8 x double> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[TMP16]]
-;
- %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %x0, <8 x i64> %x1)
- %2 = bitcast i8 %x3 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> zeroinitializer
- ret <8 x double> %3
-}
-
-declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
-
-define <8 x i64>@test_int_x86_avx512_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_permvar_di_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]])
-; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP3]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
- ret <8 x i64> %1
-}
-
-define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_di_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP8:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[_MSPROP]], <8 x i64> [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = xor <8 x i64> [[TMP5]], [[X2:%.*]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[_MSPROP]]
-; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP11]], <8 x i64> [[TMP8]]
-; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[TMP7]], <8 x i64> [[TMP5]], <8 x i64> [[X2]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP12]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
- %2 = bitcast i8 %x3 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x2
- ret <8 x i64> %3
-}
-
-define <8 x i64>@test_int_x86_avx512_maskz_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_di_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1:%.*]])
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[_MSPROP]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[_MSPROP]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
-; CHECK-NEXT: [[TMP11:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[TMP11]]
-;
- %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1)
- %2 = bitcast i8 %x3 to <8 x i1>
- %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> zeroinitializer
- ret <8 x i64> %3
-}
-
-declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
-
-define <16 x float>@test_int_x86_avx512_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_permvar_sf_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP7]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
- ret <16 x float> %1
-}
-
-define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_sf_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[X2:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
-; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[X2]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP18]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
- %2 = bitcast i16 %x3 to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %x2
- ret <16 x float> %3
-}
-
-define <16 x float>@test_int_x86_avx512_maskz_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_sf_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1:%.*]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP16]]
-;
- %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1)
- %2 = bitcast i16 %x3 to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> zeroinitializer
- ret <16 x float> %3
-}
-
-declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
-
-define <16 x i32>@test_int_x86_avx512_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_permvar_si_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]])
-; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP3]]
-;
- %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
- ret <16 x i32> %1
-}
-
-define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_permvar_si_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP8:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[_MSPROP]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = xor <16 x i32> [[TMP5]], [[X2:%.*]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[_MSPROP]]
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP11]], <16 x i32> [[TMP8]]
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP7]], <16 x i32> [[TMP5]], <16 x i32> [[X2]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP12]]
-;
- %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
- %2 = bitcast i16 %x3 to <16 x i1>
- %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
- ret <16 x i32> %3
-}
-
-define <16 x i32>@test_int_x86_avx512_maskz_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_permvar_si_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1:%.*]])
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[_MSPROP]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[_MSPROP]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[TMP11]]
-;
- %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1)
- %2 = bitcast i16 %x3 to <16 x i1>
- %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> zeroinitializer
- ret <16 x i32> %3
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)
-
-define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 4, i8 [[X4:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> [[X1]], <8 x i64> [[X2]], i32 5, i8 [[X4]], i32 4)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> [[X2]], i32 3, i8 -1, i32 8)
-; CHECK-NEXT: [[RES3:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <8 x double> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES4]]
-;
- %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 4, i8 %x4, i32 4)
- %res1 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> zeroinitializer, <8 x double> %x1, <8 x i64> %x2, i32 5, i8 %x4, i32 4)
- %res2 = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 8)
- %res3 = fadd <8 x double> %res, %res1
- %res4 = fadd <8 x double> %res3, %res2
- ret <8 x double> %res4
-}
-
-define <8 x double>@test_int_x86_avx512_mask_fixupimm_pd_512_load(<8 x double> %x0, <8 x double> %x1, ptr %x2ptr) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_pd_512_load(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[X2:%.*]] = load <8 x i64>, ptr [[X2PTR:%.*]], align 64
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X2PTR]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <8 x i64>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2]], i32 3, i8 -1, i32 4)
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES]]
-;
- %x2 = load <8 x i64>, ptr %x2ptr
- %res = call <8 x double> @llvm.x86.avx512.mask.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 -1, i32 4)
- ret <8 x double> %res
-}
-
-declare <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double>, <8 x double>, <8 x i64>, i32, i8, i32)
-
-define <8 x double>@test_int_x86_avx512_maskz_fixupimm_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i8 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_pd_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0:%.*]], <8 x double> [[X1:%.*]], <8 x i64> [[X2:%.*]], i32 3, i8 [[X4:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 4)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> [[X0]], <8 x double> [[X1]], <8 x i64> [[X2]], i32 2, i8 -1, i32 8)
-; CHECK-NEXT: [[RES3:%.*]] = fadd <8 x double> [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <8 x double> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES4]]
-;
- %res = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 3, i8 %x4, i32 4)
- %res1 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> zeroinitializer, i32 5, i8 %x4, i32 4)
- %res2 = call <8 x double> @llvm.x86.avx512.maskz.fixupimm.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x i64> %x2, i32 2, i8 -1, i32 8)
- %res3 = fadd <8 x double> %res, %res1
- %res4 = fadd <8 x double> %res3, %res2
- ret <8 x double> %res4
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32)
-
-define <4 x float>@test_int_x86_avx512_mask_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], <4 x i32> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> zeroinitializer, i32 5, i8 [[X4]], i32 4)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> [[X2]], i32 5, i8 -1, i32 8)
-; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES4]]
-;
- %res = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
- %res1 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 4)
- %res2 = call <4 x float> @llvm.x86.avx512.mask.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 -1, i32 8)
- %res3 = fadd <4 x float> %res, %res1
- %res4 = fadd <4 x float> %res3, %res2
- ret <4 x float> %res4
-}
-
-declare <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float>, <4 x float>, <4 x i32>, i32, i8, i32)
-
-define <4 x float>@test_int_x86_avx512_maskz_fixupimm_ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i8 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0:%.*]], <4 x float> [[X1:%.*]], <4 x i32> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> zeroinitializer, i32 5, i8 [[X4]], i32 8)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <4 x i32> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> [[X0]], <4 x float> [[X1]], <4 x i32> [[X2]], i32 6, i8 -1, i32 4)
-; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES4]]
-;
- %res = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 5, i8 %x4, i32 4)
- %res1 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> zeroinitializer, i32 5, i8 %x4, i32 8)
- %res2 = call <4 x float> @llvm.x86.avx512.maskz.fixupimm.ss(<4 x float> %x0, <4 x float> %x1, <4 x i32> %x2, i32 6, i8 -1, i32 4)
- %res3 = fadd <4 x float> %res, %res1
- %res4 = fadd <4 x float> %res3, %res2
- ret <4 x float> %res4
-}
-
-declare <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32)
-
-define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 5, i16 [[X4:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i16 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> zeroinitializer, i32 5, i16 [[X4]], i32 4)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> [[X2]], i32 5, i16 -1, i32 8)
-; CHECK-NEXT: [[RES3:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <16 x float> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES4]]
-;
- %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
- %res1 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 5, i16 %x4, i32 4)
- %res2 = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 8)
- %res3 = fadd <16 x float> %res, %res1
- %res4 = fadd <16 x float> %res3, %res2
- ret <16 x float> %res4
-}
-
-define <16 x float>@test_int_x86_avx512_mask_fixupimm_ps_512_load(<16 x float> %x0, <16 x float> %x1, ptr %x2ptr) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_ps_512_load(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2PTR:%.*]], align 64
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X2PTR]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP9]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP1]], [[_MSCMP2]]
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[_MSLD]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSOR]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2]], i32 5, i16 -1, i32 4)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES]]
-;
- %x2 = load <16 x i32>, ptr %x2ptr
- %res = call <16 x float> @llvm.x86.avx512.mask.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 -1, i32 4)
- ret <16 x float> %res
-}
-
-declare <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float>, <16 x float>, <16 x i32>, i32, i16, i32)
-
-define <16 x float>@test_int_x86_avx512_maskz_fixupimm_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i16 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_ps_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i16 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0:%.*]], <16 x float> [[X1:%.*]], <16 x i32> [[X2:%.*]], i32 5, i16 [[X4:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i512 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i512 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i16 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> zeroinitializer, i32 6, i16 [[X4]], i32 8)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i512 [[TMP14]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i512 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <16 x i32> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i512 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> [[X0]], <16 x float> [[X1]], <16 x i32> [[X2]], i32 7, i16 -1, i32 4)
-; CHECK-NEXT: [[RES3:%.*]] = fadd <16 x float> [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <16 x float> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES4]]
-;
- %res = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 5, i16 %x4, i32 4)
- %res1 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> zeroinitializer, i32 6, i16 %x4, i32 8)
- %res2 = call <16 x float> @llvm.x86.avx512.maskz.fixupimm.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x i32> %x2, i32 7, i16 -1, i32 4)
- %res3 = fadd <16 x float> %res, %res1
- %res4 = fadd <16 x float> %res3, %res2
- ret <16 x float> %res4
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32)
-
-define <2 x double>@test_int_x86_avx512_mask_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_fixupimm_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], <2 x i64> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 8)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: br i1 [[_MSOR15]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> [[X2]], i32 6, i8 -1, i32 4)
-; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES4]]
-;
- %res = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
- %res1 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
- %res2 = call <2 x double> @llvm.x86.avx512.mask.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 6, i8 -1, i32 4)
- %res3 = fadd <2 x double> %res, %res1
- %res4 = fadd <2 x double> %res3, %res2
- ret <2 x double> %res4
-}
-
-declare <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double>, <2 x double>, <2 x i64>, i32, i8, i32)
-
-define <2 x double>@test_int_x86_avx512_maskz_fixupimm_sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i8 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_fixupimm_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i128 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i128 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i128 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR3:%.*]] = or i1 [[_MSOR]], [[_MSCMP2]]
-; CHECK-NEXT: [[_MSCMP4:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR5:%.*]] = or i1 [[_MSOR3]], [[_MSCMP4]]
-; CHECK-NEXT: br i1 [[_MSOR5]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0:%.*]], <2 x double> [[X1:%.*]], <2 x i64> [[X2:%.*]], i32 5, i8 [[X4:%.*]], i32 4)
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP6:%.*]] = icmp ne i128 [[TMP10]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP7:%.*]] = icmp ne i128 [[TMP11]], 0
-; CHECK-NEXT: [[_MSOR8:%.*]] = or i1 [[_MSCMP6]], [[_MSCMP7]]
-; CHECK-NEXT: [[_MSCMP9:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR10:%.*]] = or i1 [[_MSOR8]], [[_MSCMP9]]
-; CHECK-NEXT: br i1 [[_MSOR10]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 13:
-; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> zeroinitializer, i32 5, i8 [[X4]], i32 8)
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP1]] to i128
-; CHECK-NEXT: [[_MSCMP11:%.*]] = icmp ne i128 [[TMP14]], 0
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[_MSCMP12:%.*]] = icmp ne i128 [[TMP15]], 0
-; CHECK-NEXT: [[_MSOR13:%.*]] = or i1 [[_MSCMP11]], [[_MSCMP12]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast <2 x i64> [[TMP3]] to i128
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i128 [[TMP16]], 0
-; CHECK-NEXT: [[_MSOR15:%.*]] = or i1 [[_MSOR13]], [[_MSCMP14]]
-; CHECK-NEXT: [[_MSCMP16:%.*]] = icmp ne i8 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR17:%.*]] = or i1 [[_MSOR15]], [[_MSCMP16]]
-; CHECK-NEXT: br i1 [[_MSOR17]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
-; CHECK: 17:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 18:
-; CHECK-NEXT: [[RES2:%.*]] = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> [[X0]], <2 x double> [[X1]], <2 x i64> [[X2]], i32 5, i8 [[X4]], i32 8)
-; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[RES]], [[RES1]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[RES3]], [[RES2]]
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES4]]
-;
- %res = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 4)
- %res1 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> zeroinitializer, i32 5, i8 %x4, i32 8)
- %res2 = call <2 x double> @llvm.x86.avx512.maskz.fixupimm.sd(<2 x double> %x0, <2 x double> %x1, <2 x i64> %x2, i32 5, i8 %x4, i32 8)
- %res3 = fadd <2 x double> %res, %res1
- %res4 = fadd <2 x double> %res3, %res2
- ret <2 x double> %res4
-}
-
-declare double @llvm.fma.f64(double, double, double) #1
-declare double @llvm.x86.avx512.vfmadd.f64(double, double, double, i32) #0
-
-define <2 x double> @test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 [[_MSPROP]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast double [[TMP8]] to i64
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast double [[TMP5]] to i64
-; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[_MSPROP4]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[_MSPROP]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP17]], i64 [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], double [[TMP8]], double [[TMP5]]
-; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[X0]], double [[TMP18]], i64 0
-; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0
-; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i64 [[_MSPROP8]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
-; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i64 [[_MSPROP9]], 0
-; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
-; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
-; CHECK: 23:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 24:
-; CHECK-NEXT: [[TMP25:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP20]], double [[TMP21]], double [[TMP22]], i32 11)
-; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <2 x i64> [[TMP1]], i64 0, i64 0
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X0]], double [[TMP25]], i64 0
-; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP11]], 0
-; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP12]], 0
-; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
-; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
-; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
-; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
-; CHECK: 30:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 31:
-; CHECK-NEXT: [[TMP32:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP27]], double [[TMP28]], double [[TMP29]], i32 10)
-; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
-; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 [[_MSPROP11]]
-; CHECK-NEXT: [[TMP37:%.*]] = bitcast double [[TMP32]] to i64
-; CHECK-NEXT: [[TMP38:%.*]] = bitcast double [[TMP27]] to i64
-; CHECK-NEXT: [[TMP39:%.*]] = xor i64 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP39]], 0
-; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP40]], [[_MSPROP11]]
-; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i64 [[TMP41]], i64 [[TMP36]]
-; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], double [[TMP32]], double [[TMP27]]
-; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT15]], i64 0
-; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x double> [[X0]], double [[TMP42]], i64 0
-; CHECK-NEXT: [[_MSPROP17:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP10]]
-; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP19]], [[TMP26]]
-; CHECK-NEXT: [[_MSPROP18:%.*]] = or <2 x i64> [[_MSPROP16]], [[_MSPROP17]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP43]], [[RES3]]
-; CHECK-NEXT: store <2 x i64> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES4]]
-;
- %1 = extractelement <2 x double> %x0, i64 0
- %2 = extractelement <2 x double> %x1, i64 0
- %3 = extractelement <2 x double> %x2, i64 0
- %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
- %5 = bitcast i8 %x3 to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, double %4, double %1
- %8 = insertelement <2 x double> %x0, double %7, i64 0
- %9 = extractelement <2 x double> %x0, i64 0
- %10 = extractelement <2 x double> %x1, i64 0
- %11 = extractelement <2 x double> %x2, i64 0
- %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
- %13 = insertelement <2 x double> %x0, double %12, i64 0
- %14 = extractelement <2 x double> %x0, i64 0
- %15 = extractelement <2 x double> %x1, i64 0
- %16 = extractelement <2 x double> %x2, i64 0
- %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 10)
- %18 = bitcast i8 %x3 to <8 x i1>
- %19 = extractelement <8 x i1> %18, i64 0
- %20 = select i1 %19, double %17, double %14
- %21 = insertelement <2 x double> %x0, double %20, i64 0
- %res3 = fadd <2 x double> %8, %13
- %res4 = fadd <2 x double> %21, %res3
- ret <2 x double> %res4
-}
-
-define <4 x float> @test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 [[_MSPROP]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast float [[TMP8]] to i32
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP5]] to i32
-; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP4]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[_MSPROP]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP17]], i32 [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], float [[TMP8]], float [[TMP5]]
-; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X0]], float [[TMP18]], i64 0
-; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0
-; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i32 [[_MSPROP8]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
-; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i32 [[_MSPROP9]], 0
-; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
-; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
-; CHECK: 23:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 24:
-; CHECK-NEXT: [[TMP25:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP20]], float [[TMP21]], float [[TMP22]], i32 11)
-; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i64 0
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X0]], float [[TMP25]], i64 0
-; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP11]], 0
-; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP12]], 0
-; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
-; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
-; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
-; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
-; CHECK: 30:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 31:
-; CHECK-NEXT: [[TMP32:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP27]], float [[TMP28]], float [[TMP29]], i32 10)
-; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
-; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 0, i32 [[_MSPROP11]]
-; CHECK-NEXT: [[TMP37:%.*]] = bitcast float [[TMP32]] to i32
-; CHECK-NEXT: [[TMP38:%.*]] = bitcast float [[TMP27]] to i32
-; CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], 0
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP40]], [[_MSPROP11]]
-; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP41]], i32 [[TMP36]]
-; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], float [[TMP32]], float [[TMP27]]
-; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT15]], i64 0
-; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[X0]], float [[TMP42]], i64 0
-; CHECK-NEXT: [[_MSPROP17:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP10]]
-; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP19]], [[TMP26]]
-; CHECK-NEXT: [[_MSPROP18:%.*]] = or <4 x i32> [[_MSPROP16]], [[_MSPROP17]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP43]], [[RES3]]
-; CHECK-NEXT: store <4 x i32> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES4]]
-;
- %1 = extractelement <4 x float> %x0, i64 0
- %2 = extractelement <4 x float> %x1, i64 0
- %3 = extractelement <4 x float> %x2, i64 0
- %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
- %5 = bitcast i8 %x3 to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, float %4, float %1
- %8 = insertelement <4 x float> %x0, float %7, i64 0
- %9 = extractelement <4 x float> %x0, i64 0
- %10 = extractelement <4 x float> %x1, i64 0
- %11 = extractelement <4 x float> %x2, i64 0
- %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
- %13 = insertelement <4 x float> %x0, float %12, i64 0
- %14 = extractelement <4 x float> %x0, i64 0
- %15 = extractelement <4 x float> %x1, i64 0
- %16 = extractelement <4 x float> %x2, i64 0
- %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 10)
- %18 = bitcast i8 %x3 to <8 x i1>
- %19 = extractelement <8 x i1> %18, i64 0
- %20 = select i1 %19, float %17, float %14
- %21 = insertelement <4 x float> %x0, float %20, i64 0
- %res3 = fadd <4 x float> %8, %13
- %res4 = fadd <4 x float> %21, %res3
- ret <4 x float> %res4
-}
-
-define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_sd(
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0
-; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i64 0, i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast double [[TMP4]] to i64
-; CHECK-NEXT: [[TMP9:%.*]] = xor i64 [[TMP8]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = or i64 [[TMP9]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP10]], 0
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 false, i64 [[TMP11]], i64 [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <2 x double> [[X0]], double [[TMP12]], i64 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[X0]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[X1]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]], i32 11)
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1>
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i64 0, i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast double [[TMP17]] to i64
-; CHECK-NEXT: [[TMP22:%.*]] = xor i64 [[TMP21]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP22]], 0
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP23]], 0
-; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select i1 false, i64 [[TMP24]], i64 [[TMP20]]
-; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], double [[TMP17]], double 0.000000e+00
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X0]], double [[TMP25]], i64 0
-; CHECK-NEXT: [[RES2:%.*]] = fadd <2 x double> [[TMP13]], [[TMP26]]
-; CHECK-NEXT: store <2 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES2]]
-;
- %1 = extractelement <2 x double> %x0, i64 0
- %2 = extractelement <2 x double> %x1, i64 0
- %3 = extractelement <2 x double> %x2, i64 0
- %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
- %5 = bitcast i8 %x3 to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, double %4, double 0.000000e+00
- %8 = insertelement <2 x double> %x0, double %7, i64 0
- %9 = extractelement <2 x double> %x0, i64 0
- %10 = extractelement <2 x double> %x1, i64 0
- %11 = extractelement <2 x double> %x2, i64 0
- %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
- %13 = bitcast i8 %x3 to <8 x i1>
- %14 = extractelement <8 x i1> %13, i64 0
- %15 = select i1 %14, double %12, double 0.000000e+00
- %16 = insertelement <2 x double> %x0, double %15, i64 0
- %res2 = fadd <2 x double> %8, %16
- ret <2 x double> %res2
-}
-
-declare float @llvm.fma.f32(float, float, float) #1
-declare float @llvm.x86.avx512.vfmadd.f32(float, float, float, i32) #0
-
-define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss(
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0
-; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], i32 0, i32 0
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast float [[TMP4]] to i32
-; CHECK-NEXT: [[TMP9:%.*]] = xor i32 [[TMP8]], 0
-; CHECK-NEXT: [[TMP10:%.*]] = or i32 [[TMP9]], 0
-; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP10]], 0
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 false, i32 [[TMP11]], i32 [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
-; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x float> [[X0]], float [[TMP12]], i64 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[X0]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[X1]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]], i32 11)
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[X3]] to <8 x i1>
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 0, i32 0
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast float [[TMP17]] to i32
-; CHECK-NEXT: [[TMP22:%.*]] = xor i32 [[TMP21]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP22]], 0
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], 0
-; CHECK-NEXT: [[_MSPROP_SELECT1:%.*]] = select i1 false, i32 [[TMP24]], i32 [[TMP20]]
-; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], float [[TMP17]], float 0.000000e+00
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X0]], float [[TMP25]], i64 0
-; CHECK-NEXT: [[RES2:%.*]] = fadd <4 x float> [[TMP13]], [[TMP26]]
-; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES2]]
-;
- %1 = extractelement <4 x float> %x0, i64 0
- %2 = extractelement <4 x float> %x1, i64 0
- %3 = extractelement <4 x float> %x2, i64 0
- %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
- %5 = bitcast i8 %x3 to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, float %4, float 0.000000e+00
- %8 = insertelement <4 x float> %x0, float %7, i64 0
- %9 = extractelement <4 x float> %x0, i64 0
- %10 = extractelement <4 x float> %x1, i64 0
- %11 = extractelement <4 x float> %x2, i64 0
- %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
- %13 = bitcast i8 %x3 to <8 x i1>
- %14 = extractelement <8 x i1> %13, i64 0
- %15 = select i1 %14, float %12, float 0.000000e+00
- %16 = insertelement <4 x float> %x0, float %15, i64 0
- %res2 = fadd <4 x float> %8, %16
- ret <4 x float> %res2
-}
-
-define <4 x float> @test_int_x86_avx512_maskz_vfmadd_ss_load0(i8 zeroext %0, ptr nocapture readonly %1, float %2, float %3) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss_load0(
-; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 24) to ptr), align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[TMP11:%.*]] = load <4 x float>, ptr [[TMP1:%.*]], align 16
-; CHECK-NEXT: [[TMP12:%.*]] = ptrtoint ptr [[TMP1]] to i64
-; CHECK-NEXT: [[TMP13:%.*]] = xor i64 [[TMP12]], 87960930222080
-; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP14]], align 16
-; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[TMP11]], i64 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = or i32 [[_MSPROP]], [[TMP6]]
-; CHECK-NEXT: [[_MSPROP2:%.*]] = or i32 [[_MSPROP1]], [[TMP7]]
-; CHECK-NEXT: [[TMP16:%.*]] = tail call float @llvm.fma.f32(float [[TMP15]], float [[TMP2:%.*]], float [[TMP3:%.*]])
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[TMP8]] to <8 x i1>
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP0:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <8 x i1> [[TMP17]], i64 0
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP19]], i32 [[_MSPROP2]], i32 0
-; CHECK-NEXT: [[TMP21:%.*]] = bitcast float [[TMP16]] to i32
-; CHECK-NEXT: [[TMP22:%.*]] = xor i32 [[TMP21]], 0
-; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP22]], [[_MSPROP2]]
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], 0
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP3]], i32 [[TMP24]], i32 [[TMP20]]
-; CHECK-NEXT: [[TMP25:%.*]] = select i1 [[TMP19]], float [[TMP16]], float 0.000000e+00
-; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <4 x i32> [[_MSLD]], i32 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[TMP11]], float [[TMP25]], i64 0
-; CHECK-NEXT: store <4 x i32> [[_MSPROP4]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[TMP26]]
-;
- %5 = load <4 x float>, ptr %1, align 16
- %6 = extractelement <4 x float> %5, i64 0
- %7 = tail call float @llvm.fma.f32(float %6, float %2, float %3) #2
- %8 = bitcast i8 %0 to <8 x i1>
- %9 = extractelement <8 x i1> %8, i64 0
- %10 = select i1 %9, float %7, float 0.000000e+00
- %11 = insertelement <4 x float> %5, float %10, i64 0
- ret <4 x float> %11
-}
-
-define <2 x double> @test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X2:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = call double @llvm.fma.f64(double [[TMP5]], double [[TMP6]], double [[TMP7]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i64 [[_MSPROP4]], i64 [[_MSPROP2]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast double [[TMP8]] to i64
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast double [[TMP7]] to i64
-; CHECK-NEXT: [[TMP15:%.*]] = xor i64 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP15]], [[_MSPROP4]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP16]], [[_MSPROP2]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i64 [[TMP17]], i64 [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], double [[TMP8]], double [[TMP7]]
-; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x double> [[X2]], double [[TMP18]], i64 0
-; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <2 x double> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <2 x double> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP7]], 0
-; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i64 [[_MSPROP8]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
-; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i64 [[_MSPROP9]], 0
-; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
-; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
-; CHECK: 23:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 24:
-; CHECK-NEXT: [[TMP25:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP20]], double [[TMP21]], double [[TMP22]], i32 11)
-; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <2 x i64> [[TMP3]], i64 0, i64 0
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <2 x double> [[X2]], double [[TMP25]], i64 0
-; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <2 x double> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP11]], 0
-; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP12]], 0
-; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
-; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
-; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
-; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
-; CHECK: 30:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 31:
-; CHECK-NEXT: [[TMP32:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP27]], double [[TMP28]], double [[TMP29]], i32 10)
-; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
-; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i64 0, i64 [[_MSPROP13]]
-; CHECK-NEXT: [[TMP37:%.*]] = bitcast double [[TMP32]] to i64
-; CHECK-NEXT: [[TMP38:%.*]] = bitcast double [[TMP29]] to i64
-; CHECK-NEXT: [[TMP39:%.*]] = xor i64 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP39]], 0
-; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP40]], [[_MSPROP13]]
-; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i64 [[TMP41]], i64 [[TMP36]]
-; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], double [[TMP32]], double [[TMP29]]
-; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <2 x i64> [[TMP3]], i64 [[_MSPROP_SELECT15]], i64 0
-; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x double> [[X2]], double [[TMP42]], i64 0
-; CHECK-NEXT: [[_MSPROP17:%.*]] = or <2 x i64> [[_MSPROP6]], [[_MSPROP10]]
-; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP19]], [[TMP26]]
-; CHECK-NEXT: [[_MSPROP18:%.*]] = or <2 x i64> [[_MSPROP16]], [[_MSPROP17]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP43]], [[RES3]]
-; CHECK-NEXT: store <2 x i64> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES4]]
-;
- %1 = extractelement <2 x double> %x0, i64 0
- %2 = extractelement <2 x double> %x1, i64 0
- %3 = extractelement <2 x double> %x2, i64 0
- %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
- %5 = bitcast i8 %x3 to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, double %4, double %3
- %8 = insertelement <2 x double> %x2, double %7, i64 0
- %9 = extractelement <2 x double> %x0, i64 0
- %10 = extractelement <2 x double> %x1, i64 0
- %11 = extractelement <2 x double> %x2, i64 0
- %12 = call double @llvm.x86.avx512.vfmadd.f64(double %9, double %10, double %11, i32 11)
- %13 = insertelement <2 x double> %x2, double %12, i64 0
- %14 = extractelement <2 x double> %x0, i64 0
- %15 = extractelement <2 x double> %x1, i64 0
- %16 = extractelement <2 x double> %x2, i64 0
- %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 10)
- %18 = bitcast i8 %x3 to <8 x i1>
- %19 = extractelement <8 x i1> %18, i64 0
- %20 = select i1 %19, double %17, double %16
- %21 = insertelement <2 x double> %x2, double %20, i64 0
- %res3 = fadd <2 x double> %8, %13
- %res4 = fadd <2 x double> %21, %res3
- ret <2 x double> %res4
-}
-
-define <4 x float> @test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X2:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
-; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.fma.f32(float [[TMP5]], float [[TMP6]], float [[TMP7]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <8 x i1> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <8 x i1> [[TMP10]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP11]], i32 [[_MSPROP4]], i32 [[_MSPROP2]]
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast float [[TMP8]] to i32
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP7]] to i32
-; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP4]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], [[_MSPROP2]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP5]], i32 [[TMP17]], i32 [[TMP12]]
-; CHECK-NEXT: [[TMP18:%.*]] = select i1 [[TMP11]], float [[TMP8]], float [[TMP7]]
-; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X2]], float [[TMP18]], i64 0
-; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x float> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP7]], 0
-; CHECK-NEXT: [[_MSCMP19:%.*]] = icmp ne i32 [[_MSPROP8]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP19]]
-; CHECK-NEXT: [[_MSCMP20:%.*]] = icmp ne i32 [[_MSPROP9]], 0
-; CHECK-NEXT: [[_MSOR21:%.*]] = or i1 [[_MSOR]], [[_MSCMP20]]
-; CHECK-NEXT: br i1 [[_MSOR21]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
-; CHECK: 23:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 24:
-; CHECK-NEXT: [[TMP25:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP20]], float [[TMP21]], float [[TMP22]], i32 11)
-; CHECK-NEXT: [[_MSPROP10:%.*]] = insertelement <4 x i32> [[TMP3]], i32 0, i64 0
-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x float> [[X2]], float [[TMP25]], i64 0
-; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x float> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP11]], 0
-; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP12]], 0
-; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSCMP22]], [[_MSCMP23]]
-; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
-; CHECK-NEXT: [[_MSOR26:%.*]] = or i1 [[_MSOR24]], [[_MSCMP25]]
-; CHECK-NEXT: br i1 [[_MSOR26]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
-; CHECK: 30:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 31:
-; CHECK-NEXT: [[TMP32:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP27]], float [[TMP28]], float [[TMP29]], i32 10)
-; CHECK-NEXT: [[TMP33:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP34:%.*]] = bitcast i8 [[X3]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP33]], i64 0
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i1> [[TMP34]], i64 0
-; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], i32 0, i32 [[_MSPROP13]]
-; CHECK-NEXT: [[TMP37:%.*]] = bitcast float [[TMP32]] to i32
-; CHECK-NEXT: [[TMP38:%.*]] = bitcast float [[TMP29]] to i32
-; CHECK-NEXT: [[TMP39:%.*]] = xor i32 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], 0
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP40]], [[_MSPROP13]]
-; CHECK-NEXT: [[_MSPROP_SELECT15:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP41]], i32 [[TMP36]]
-; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP35]], float [[TMP32]], float [[TMP29]]
-; CHECK-NEXT: [[_MSPROP16:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT15]], i64 0
-; CHECK-NEXT: [[TMP43:%.*]] = insertelement <4 x float> [[X2]], float [[TMP42]], i64 0
-; CHECK-NEXT: [[_MSPROP17:%.*]] = or <4 x i32> [[_MSPROP6]], [[_MSPROP10]]
-; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP19]], [[TMP26]]
-; CHECK-NEXT: [[_MSPROP18:%.*]] = or <4 x i32> [[_MSPROP16]], [[_MSPROP17]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP43]], [[RES3]]
-; CHECK-NEXT: store <4 x i32> [[_MSPROP18]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES4]]
-;
- %1 = extractelement <4 x float> %x0, i64 0
- %2 = extractelement <4 x float> %x1, i64 0
- %3 = extractelement <4 x float> %x2, i64 0
- %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
- %5 = bitcast i8 %x3 to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, float %4, float %3
- %8 = insertelement <4 x float> %x2, float %7, i64 0
- %9 = extractelement <4 x float> %x0, i64 0
- %10 = extractelement <4 x float> %x1, i64 0
- %11 = extractelement <4 x float> %x2, i64 0
- %12 = call float @llvm.x86.avx512.vfmadd.f32(float %9, float %10, float %11, i32 11)
- %13 = insertelement <4 x float> %x2, float %12, i64 0
- %14 = extractelement <4 x float> %x0, i64 0
- %15 = extractelement <4 x float> %x1, i64 0
- %16 = extractelement <4 x float> %x2, i64 0
- %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 10)
- %18 = bitcast i8 %x3 to <8 x i1>
- %19 = extractelement <8 x i1> %18, i64 0
- %20 = select i1 %19, float %17, float %16
- %21 = insertelement <4 x float> %x2, float %20, i64 0
- %res3 = fadd <4 x float> %8, %13
- %res4 = fadd <4 x float> %21, %res3
- ret <4 x float> %res4
-}
-
-define void @fmadd_ss_mask_memfold(ptr %a, ptr %b, i8 %c) #0 {
-; CHECK-LABEL: @fmadd_ss_mask_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[A_VAL:%.*]] = load float, ptr [[A:%.*]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
-; CHECK-NEXT: [[AV0:%.*]] = insertelement <4 x float> undef, float [[A_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
-; CHECK-NEXT: [[AV1:%.*]] = insertelement <4 x float> [[AV0]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
-; CHECK-NEXT: [[AV2:%.*]] = insertelement <4 x float> [[AV1]], float 0.000000e+00, i32 2
-; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
-; CHECK-NEXT: [[AV:%.*]] = insertelement <4 x float> [[AV2]], float 0.000000e+00, i32 3
-; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP17]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[B_VAL:%.*]] = load float, ptr [[B:%.*]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
-; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
-; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
-; CHECK-NEXT: [[_MSLD4:%.*]] = load i32, ptr [[TMP13]], align 4
-; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD4]], i32 0
-; CHECK-NEXT: [[BV0:%.*]] = insertelement <4 x float> undef, float [[B_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[_MSPROP5]], i32 0, i32 1
-; CHECK-NEXT: [[BV1:%.*]] = insertelement <4 x float> [[BV0]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[_MSPROP6]], i32 0, i32 2
-; CHECK-NEXT: [[BV2:%.*]] = insertelement <4 x float> [[BV1]], float 0.000000e+00, i32 2
-; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <4 x i32> [[_MSPROP7]], i32 0, i32 3
-; CHECK-NEXT: [[BV:%.*]] = insertelement <4 x float> [[BV2]], float 0.000000e+00, i32 3
-; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[AV]], i64 0
-; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[_MSPROP8]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[BV]], i64 0
-; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[AV]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = or i32 [[_MSPROP9]], [[_MSPROP10]]
-; CHECK-NEXT: [[_MSPROP13:%.*]] = or i32 [[_MSPROP12]], [[_MSPROP11]]
-; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fma.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]])
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[_MSPROP13]], i32 [[_MSPROP9]]
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast float [[TMP17]] to i32
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast float [[TMP14]] to i32
-; CHECK-NEXT: [[TMP24:%.*]] = xor i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP24]], [[_MSPROP13]]
-; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP25]], [[_MSPROP9]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP26]], i32 [[TMP21]]
-; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP20]], float [[TMP17]], float [[TMP14]]
-; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <4 x i32> [[_MSPROP3]], i32 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x float> [[AV]], float [[TMP27]], i64 0
-; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[_MSPROP15]], i32 0
-; CHECK-NEXT: [[SR:%.*]] = extractelement <4 x float> [[TMP28]], i32 0
-; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP18]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]]
-; CHECK: 29:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 30:
-; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[A]] to i64
-; CHECK-NEXT: [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080
-; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr
-; CHECK-NEXT: store i32 [[_MSPROP16]], ptr [[TMP33]], align 4
-; CHECK-NEXT: store float [[SR]], ptr [[A]], align 4
-; CHECK-NEXT: ret void
-;
- %a.val = load float, ptr %a
- %av0 = insertelement <4 x float> undef, float %a.val, i32 0
- %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
- %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
- %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
-
- %b.val = load float, ptr %b
- %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
- %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
- %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
- %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
- %1 = extractelement <4 x float> %av, i64 0
- %2 = extractelement <4 x float> %bv, i64 0
- %3 = extractelement <4 x float> %av, i64 0
- %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
- %5 = bitcast i8 %c to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, float %4, float %1
- %8 = insertelement <4 x float> %av, float %7, i64 0
- %sr = extractelement <4 x float> %8, i32 0
- store float %sr, ptr %a
- ret void
-}
-
-define void @fmadd_ss_maskz_memfold(ptr %a, ptr %b, i8 %c) #0 {
-; CHECK-LABEL: @fmadd_ss_maskz_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[A_VAL:%.*]] = load float, ptr [[A:%.*]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
-; CHECK-NEXT: [[AV0:%.*]] = insertelement <4 x float> undef, float [[A_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <4 x i32> [[_MSPROP]], i32 0, i32 1
-; CHECK-NEXT: [[AV1:%.*]] = insertelement <4 x float> [[AV0]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSPROP2:%.*]] = insertelement <4 x i32> [[_MSPROP1]], i32 0, i32 2
-; CHECK-NEXT: [[AV2:%.*]] = insertelement <4 x float> [[AV1]], float 0.000000e+00, i32 2
-; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <4 x i32> [[_MSPROP2]], i32 0, i32 3
-; CHECK-NEXT: [[AV:%.*]] = insertelement <4 x float> [[AV2]], float 0.000000e+00, i32 3
-; CHECK-NEXT: [[_MSCMP17:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP17]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[B_VAL:%.*]] = load float, ptr [[B:%.*]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
-; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
-; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
-; CHECK-NEXT: [[_MSLD4:%.*]] = load i32, ptr [[TMP13]], align 4
-; CHECK-NEXT: [[_MSPROP5:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD4]], i32 0
-; CHECK-NEXT: [[BV0:%.*]] = insertelement <4 x float> undef, float [[B_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[_MSPROP5]], i32 0, i32 1
-; CHECK-NEXT: [[BV1:%.*]] = insertelement <4 x float> [[BV0]], float 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[_MSPROP6]], i32 0, i32 2
-; CHECK-NEXT: [[BV2:%.*]] = insertelement <4 x float> [[BV1]], float 0.000000e+00, i32 2
-; CHECK-NEXT: [[_MSPROP8:%.*]] = insertelement <4 x i32> [[_MSPROP7]], i32 0, i32 3
-; CHECK-NEXT: [[BV:%.*]] = insertelement <4 x float> [[BV2]], float 0.000000e+00, i32 3
-; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x float> [[AV]], i64 0
-; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[_MSPROP8]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x float> [[BV]], i64 0
-; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[_MSPROP3]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x float> [[AV]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = or i32 [[_MSPROP9]], [[_MSPROP10]]
-; CHECK-NEXT: [[_MSPROP13:%.*]] = or i32 [[_MSPROP12]], [[_MSPROP11]]
-; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fma.f32(float [[TMP14]], float [[TMP15]], float [[TMP16]])
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i32 [[_MSPROP13]], i32 0
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast float [[TMP17]] to i32
-; CHECK-NEXT: [[TMP23:%.*]] = xor i32 [[TMP22]], 0
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP23]], [[_MSPROP13]]
-; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP24]], 0
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP14]], i32 [[TMP25]], i32 [[TMP21]]
-; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP20]], float [[TMP17]], float 0.000000e+00
-; CHECK-NEXT: [[_MSPROP15:%.*]] = insertelement <4 x i32> [[_MSPROP3]], i32 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = insertelement <4 x float> [[AV]], float [[TMP26]], i64 0
-; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[_MSPROP15]], i32 0
-; CHECK-NEXT: [[SR:%.*]] = extractelement <4 x float> [[TMP27]], i32 0
-; CHECK-NEXT: [[_MSCMP18:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP18]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
-; CHECK: 28:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 29:
-; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[A]] to i64
-; CHECK-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080
-; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr
-; CHECK-NEXT: store i32 [[_MSPROP16]], ptr [[TMP32]], align 4
-; CHECK-NEXT: store float [[SR]], ptr [[A]], align 4
-; CHECK-NEXT: ret void
-;
- %a.val = load float, ptr %a
- %av0 = insertelement <4 x float> undef, float %a.val, i32 0
- %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
- %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
- %av = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
-
- %b.val = load float, ptr %b
- %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
- %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
- %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
- %bv = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
- %1 = extractelement <4 x float> %av, i64 0
- %2 = extractelement <4 x float> %bv, i64 0
- %3 = extractelement <4 x float> %av, i64 0
- %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
- %5 = bitcast i8 %c to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, float %4, float 0.000000e+00
- %8 = insertelement <4 x float> %av, float %7, i64 0
- %sr = extractelement <4 x float> %8, i32 0
- store float %sr, ptr %a
- ret void
-}
-
-define void @fmadd_sd_mask_memfold(ptr %a, ptr %b, i8 %c) #0 {
-; CHECK-LABEL: @fmadd_sd_mask_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A:%.*]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
-; CHECK-NEXT: [[AV0:%.*]] = insertelement <2 x double> undef, double [[A_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
-; CHECK-NEXT: [[AV:%.*]] = insertelement <2 x double> [[AV0]], double 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP13]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[B_VAL:%.*]] = load double, ptr [[B:%.*]], align 8
-; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
-; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
-; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
-; CHECK-NEXT: [[_MSLD2:%.*]] = load i64, ptr [[TMP13]], align 8
-; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD2]], i32 0
-; CHECK-NEXT: [[BV0:%.*]] = insertelement <2 x double> undef, double [[B_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <2 x i64> [[_MSPROP3]], i64 0, i32 1
-; CHECK-NEXT: [[BV:%.*]] = insertelement <2 x double> [[BV0]], double 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[AV]], i64 0
-; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <2 x i64> [[_MSPROP4]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[BV]], i64 0
-; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[AV]], i64 0
-; CHECK-NEXT: [[_MSPROP8:%.*]] = or i64 [[_MSPROP5]], [[_MSPROP6]]
-; CHECK-NEXT: [[_MSPROP9:%.*]] = or i64 [[_MSPROP8]], [[_MSPROP7]]
-; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.fma.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]])
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i64 [[_MSPROP9]], i64 [[_MSPROP5]]
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast double [[TMP17]] to i64
-; CHECK-NEXT: [[TMP23:%.*]] = bitcast double [[TMP14]] to i64
-; CHECK-NEXT: [[TMP24:%.*]] = xor i64 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP24]], [[_MSPROP9]]
-; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP25]], [[_MSPROP5]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP26]], i64 [[TMP21]]
-; CHECK-NEXT: [[TMP27:%.*]] = select i1 [[TMP20]], double [[TMP17]], double [[TMP14]]
-; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <2 x i64> [[_MSPROP1]], i64 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x double> [[AV]], double [[TMP27]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[_MSPROP11]], i32 0
-; CHECK-NEXT: [[SR:%.*]] = extractelement <2 x double> [[TMP28]], i32 0
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP14]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]]
-; CHECK: 29:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 30:
-; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[A]] to i64
-; CHECK-NEXT: [[TMP32:%.*]] = xor i64 [[TMP31]], 87960930222080
-; CHECK-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP32]] to ptr
-; CHECK-NEXT: store i64 [[_MSPROP12]], ptr [[TMP33]], align 8
-; CHECK-NEXT: store double [[SR]], ptr [[A]], align 8
-; CHECK-NEXT: ret void
-;
- %a.val = load double, ptr %a
- %av0 = insertelement <2 x double> undef, double %a.val, i32 0
- %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
-
- %b.val = load double, ptr %b
- %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
- %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
- %1 = extractelement <2 x double> %av, i64 0
- %2 = extractelement <2 x double> %bv, i64 0
- %3 = extractelement <2 x double> %av, i64 0
- %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
- %5 = bitcast i8 %c to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, double %4, double %1
- %8 = insertelement <2 x double> %av, double %7, i64 0
- %sr = extractelement <2 x double> %8, i32 0
- store double %sr, ptr %a
- ret void
-}
-
-define void @fmadd_sd_maskz_memfold(ptr %a, ptr %b, i8 %c) #0 {
-; CHECK-LABEL: @fmadd_sd_maskz_memfold(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 8) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[A_VAL:%.*]] = load double, ptr [[A:%.*]], align 8
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[A]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i64, ptr [[TMP8]], align 8
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD]], i32 0
-; CHECK-NEXT: [[AV0:%.*]] = insertelement <2 x double> undef, double [[A_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = insertelement <2 x i64> [[_MSPROP]], i64 0, i32 1
-; CHECK-NEXT: [[AV:%.*]] = insertelement <2 x double> [[AV0]], double 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSCMP13:%.*]] = icmp ne i64 [[TMP2]], 0
-; CHECK-NEXT: br i1 [[_MSCMP13]], label [[TMP9:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 9:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 10:
-; CHECK-NEXT: [[B_VAL:%.*]] = load double, ptr [[B:%.*]], align 8
-; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[B]] to i64
-; CHECK-NEXT: [[TMP12:%.*]] = xor i64 [[TMP11]], 87960930222080
-; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr
-; CHECK-NEXT: [[_MSLD2:%.*]] = load i64, ptr [[TMP13]], align 8
-; CHECK-NEXT: [[_MSPROP3:%.*]] = insertelement <2 x i64> splat (i64 -1), i64 [[_MSLD2]], i32 0
-; CHECK-NEXT: [[BV0:%.*]] = insertelement <2 x double> undef, double [[B_VAL]], i32 0
-; CHECK-NEXT: [[_MSPROP4:%.*]] = insertelement <2 x i64> [[_MSPROP3]], i64 0, i32 1
-; CHECK-NEXT: [[BV:%.*]] = insertelement <2 x double> [[BV0]], double 0.000000e+00, i32 1
-; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x double> [[AV]], i64 0
-; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <2 x i64> [[_MSPROP4]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x double> [[BV]], i64 0
-; CHECK-NEXT: [[_MSPROP7:%.*]] = extractelement <2 x i64> [[_MSPROP1]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x double> [[AV]], i64 0
-; CHECK-NEXT: [[_MSPROP8:%.*]] = or i64 [[_MSPROP5]], [[_MSPROP6]]
-; CHECK-NEXT: [[_MSPROP9:%.*]] = or i64 [[_MSPROP8]], [[_MSPROP7]]
-; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.fma.f64(double [[TMP14]], double [[TMP15]], double [[TMP16]])
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast i8 [[C:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <8 x i1> [[TMP18]], i64 0
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i1> [[TMP19]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP20]], i64 [[_MSPROP9]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = bitcast double [[TMP17]] to i64
-; CHECK-NEXT: [[TMP23:%.*]] = xor i64 [[TMP22]], 0
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP23]], [[_MSPROP9]]
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP24]], 0
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP10]], i64 [[TMP25]], i64 [[TMP21]]
-; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP20]], double [[TMP17]], double 0.000000e+00
-; CHECK-NEXT: [[_MSPROP11:%.*]] = insertelement <2 x i64> [[_MSPROP1]], i64 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = insertelement <2 x double> [[AV]], double [[TMP26]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = extractelement <2 x i64> [[_MSPROP11]], i32 0
-; CHECK-NEXT: [[SR:%.*]] = extractelement <2 x double> [[TMP27]], i32 0
-; CHECK-NEXT: [[_MSCMP14:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP14]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
-; CHECK: 28:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 29:
-; CHECK-NEXT: [[TMP30:%.*]] = ptrtoint ptr [[A]] to i64
-; CHECK-NEXT: [[TMP31:%.*]] = xor i64 [[TMP30]], 87960930222080
-; CHECK-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP31]] to ptr
-; CHECK-NEXT: store i64 [[_MSPROP12]], ptr [[TMP32]], align 8
-; CHECK-NEXT: store double [[SR]], ptr [[A]], align 8
-; CHECK-NEXT: ret void
-;
- %a.val = load double, ptr %a
- %av0 = insertelement <2 x double> undef, double %a.val, i32 0
- %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
-
- %b.val = load double, ptr %b
- %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
- %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
- %1 = extractelement <2 x double> %av, i64 0
- %2 = extractelement <2 x double> %bv, i64 0
- %3 = extractelement <2 x double> %av, i64 0
- %4 = call double @llvm.fma.f64(double %1, double %2, double %3)
- %5 = bitcast i8 %c to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, double %4, double 0.000000e+00
- %8 = insertelement <2 x double> %av, double %7, i64 0
- %sr = extractelement <2 x double> %8, i32 0
- store double %sr, ptr %a
- ret void
-}
-
-define <2 x double> @test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmsub_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = fneg <2 x double> [[X2:%.*]]
-; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[TMP5]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = call double @llvm.fma.f64(double [[TMP6]], double [[TMP7]], double [[TMP8]])
-; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP11]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
-; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i64 [[_MSPROP4]], i64 [[_MSPROP5]]
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast double [[TMP9]] to i64
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast double [[TMP10]] to i64
-; CHECK-NEXT: [[TMP17:%.*]] = xor i64 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP17]], [[_MSPROP4]]
-; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[_MSPROP5]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i64 [[TMP19]], i64 [[TMP14]]
-; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP13]], double [[TMP9]], double [[TMP10]]
-; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x double> [[X2]], double [[TMP20]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = fneg <2 x double> [[X2]]
-; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x double> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <2 x double> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP22]], i64 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP8]], 0
-; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
-; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP10]], 0
-; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
-; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]]
-; CHECK: 26:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 27:
-; CHECK-NEXT: [[TMP28:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP23]], double [[TMP24]], double [[TMP25]], i32 11)
-; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP1]], i64 0, i64 0
-; CHECK-NEXT: [[TMP30:%.*]] = insertelement <2 x double> [[X2]], double [[TMP28]], i64 0
-; CHECK-NEXT: [[TMP31:%.*]] = fneg <2 x double> [[X2]]
-; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <2 x double> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <2 x double> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[TMP31]], i64 0
-; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
-; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i64 [[_MSPROP14]], 0
-; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
-; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i64 [[_MSPROP15]], 0
-; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
-; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP35:%.*]], label [[TMP36:%.*]], !prof [[PROF1]]
-; CHECK: 35:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 36:
-; CHECK-NEXT: [[TMP37:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP32]], double [[TMP33]], double [[TMP34]], i32 10)
-; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast i8 [[X3]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP39]], i64 0
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP40]], i64 0
-; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i64 0, i64 [[_MSPROP16]]
-; CHECK-NEXT: [[TMP43:%.*]] = bitcast double [[TMP37]] to i64
-; CHECK-NEXT: [[TMP44:%.*]] = bitcast double [[TMP38]] to i64
-; CHECK-NEXT: [[TMP45:%.*]] = xor i64 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP46:%.*]] = or i64 [[TMP45]], 0
-; CHECK-NEXT: [[TMP47:%.*]] = or i64 [[TMP46]], [[_MSPROP16]]
-; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i64 [[TMP47]], i64 [[TMP42]]
-; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], double [[TMP37]], double [[TMP38]]
-; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <2 x i64> [[TMP1]], i64 [[_MSPROP_SELECT18]], i64 0
-; CHECK-NEXT: [[TMP49:%.*]] = insertelement <2 x double> [[X2]], double [[TMP48]], i64 0
-; CHECK-NEXT: [[_MSPROP20:%.*]] = or <2 x i64> [[_MSPROP7]], [[_MSPROP12]]
-; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP21]], [[TMP30]]
-; CHECK-NEXT: [[_MSPROP21:%.*]] = or <2 x i64> [[_MSPROP19]], [[_MSPROP20]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP49]], [[RES3]]
-; CHECK-NEXT: store <2 x i64> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES4]]
-;
- %1 = fneg <2 x double> %x2
- %2 = extractelement <2 x double> %x0, i64 0
- %3 = extractelement <2 x double> %x1, i64 0
- %4 = extractelement <2 x double> %1, i64 0
- %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
- %6 = extractelement <2 x double> %x2, i64 0
- %7 = bitcast i8 %x3 to <8 x i1>
- %8 = extractelement <8 x i1> %7, i64 0
- %9 = select i1 %8, double %5, double %6
- %10 = insertelement <2 x double> %x2, double %9, i64 0
- %11 = fneg <2 x double> %x2
- %12 = extractelement <2 x double> %x0, i64 0
- %13 = extractelement <2 x double> %x1, i64 0
- %14 = extractelement <2 x double> %11, i64 0
- %15 = call double @llvm.x86.avx512.vfmadd.f64(double %12, double %13, double %14, i32 11)
- %16 = extractelement <2 x double> %x2, i64 0
- %17 = insertelement <2 x double> %x2, double %15, i64 0
- %18 = fneg <2 x double> %x2
- %19 = extractelement <2 x double> %x0, i64 0
- %20 = extractelement <2 x double> %x1, i64 0
- %21 = extractelement <2 x double> %18, i64 0
- %22 = call double @llvm.x86.avx512.vfmadd.f64(double %19, double %20, double %21, i32 10)
- %23 = extractelement <2 x double> %x2, i64 0
- %24 = bitcast i8 %x3 to <8 x i1>
- %25 = extractelement <8 x i1> %24, i64 0
- %26 = select i1 %25, double %22, double %23
- %27 = insertelement <2 x double> %x2, double %26, i64 0
- %res3 = fadd <2 x double> %10, %17
- %res4 = fadd <2 x double> %27, %res3
- ret <2 x double> %res4
-}
-
-define <4 x float> @test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmsub_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = fneg <4 x float> [[X2:%.*]]
-; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[TMP5]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
-; CHECK-NEXT: [[TMP9:%.*]] = call float @llvm.fma.f32(float [[TMP6]], float [[TMP7]], float [[TMP8]])
-; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP11]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
-; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP13]], i32 [[_MSPROP4]], i32 [[_MSPROP5]]
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast float [[TMP9]] to i32
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP10]] to i32
-; CHECK-NEXT: [[TMP17:%.*]] = xor i32 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP17]], [[_MSPROP4]]
-; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP18]], [[_MSPROP5]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP19]], i32 [[TMP14]]
-; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP13]], float [[TMP9]], float [[TMP10]]
-; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x float> [[X2]], float [[TMP20]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = fneg <4 x float> [[X2]]
-; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x float> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x float> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x float> [[TMP22]], i64 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP8]], 0
-; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
-; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP10]], 0
-; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
-; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]]
-; CHECK: 26:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 27:
-; CHECK-NEXT: [[TMP28:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP23]], float [[TMP24]], float [[TMP25]], i32 11)
-; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP1]], i32 0, i64 0
-; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[X2]], float [[TMP28]], i64 0
-; CHECK-NEXT: [[TMP31:%.*]] = fneg <4 x float> [[X2]]
-; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x float> [[X0]], i64 0
-; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x float> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x float> [[TMP31]], i64 0
-; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
-; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i32 [[_MSPROP14]], 0
-; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
-; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[_MSPROP15]], 0
-; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
-; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP35:%.*]], label [[TMP36:%.*]], !prof [[PROF1]]
-; CHECK: 35:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 36:
-; CHECK-NEXT: [[TMP37:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP32]], float [[TMP33]], float [[TMP34]], i32 10)
-; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[TMP39:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP40:%.*]] = bitcast i8 [[X3]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP39]], i64 0
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i1> [[TMP40]], i64 0
-; CHECK-NEXT: [[TMP42:%.*]] = select i1 [[TMP41]], i32 0, i32 [[_MSPROP16]]
-; CHECK-NEXT: [[TMP43:%.*]] = bitcast float [[TMP37]] to i32
-; CHECK-NEXT: [[TMP44:%.*]] = bitcast float [[TMP38]] to i32
-; CHECK-NEXT: [[TMP45:%.*]] = xor i32 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP45]], 0
-; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP46]], [[_MSPROP16]]
-; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i32 [[TMP47]], i32 [[TMP42]]
-; CHECK-NEXT: [[TMP48:%.*]] = select i1 [[TMP41]], float [[TMP37]], float [[TMP38]]
-; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[_MSPROP_SELECT18]], i64 0
-; CHECK-NEXT: [[TMP49:%.*]] = insertelement <4 x float> [[X2]], float [[TMP48]], i64 0
-; CHECK-NEXT: [[_MSPROP20:%.*]] = or <4 x i32> [[_MSPROP7]], [[_MSPROP12]]
-; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP21]], [[TMP30]]
-; CHECK-NEXT: [[_MSPROP21:%.*]] = or <4 x i32> [[_MSPROP19]], [[_MSPROP20]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP49]], [[RES3]]
-; CHECK-NEXT: store <4 x i32> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES4]]
-;
- %1 = fneg <4 x float> %x2
- %2 = extractelement <4 x float> %x0, i64 0
- %3 = extractelement <4 x float> %x1, i64 0
- %4 = extractelement <4 x float> %1, i64 0
- %5 = call float @llvm.fma.f32(float %2, float %3, float %4)
- %6 = extractelement <4 x float> %x2, i64 0
- %7 = bitcast i8 %x3 to <8 x i1>
- %8 = extractelement <8 x i1> %7, i64 0
- %9 = select i1 %8, float %5, float %6
- %10 = insertelement <4 x float> %x2, float %9, i64 0
- %11 = fneg <4 x float> %x2
- %12 = extractelement <4 x float> %x0, i64 0
- %13 = extractelement <4 x float> %x1, i64 0
- %14 = extractelement <4 x float> %11, i64 0
- %15 = call float @llvm.x86.avx512.vfmadd.f32(float %12, float %13, float %14, i32 11)
- %16 = extractelement <4 x float> %x2, i64 0
- %17 = insertelement <4 x float> %x2, float %15, i64 0
- %18 = fneg <4 x float> %x2
- %19 = extractelement <4 x float> %x0, i64 0
- %20 = extractelement <4 x float> %x1, i64 0
- %21 = extractelement <4 x float> %18, i64 0
- %22 = call float @llvm.x86.avx512.vfmadd.f32(float %19, float %20, float %21, i32 10)
- %23 = extractelement <4 x float> %x2, i64 0
- %24 = bitcast i8 %x3 to <8 x i1>
- %25 = extractelement <8 x i1> %24, i64 0
- %26 = select i1 %25, float %22, float %23
- %27 = insertelement <4 x float> %x2, float %26, i64 0
- %res3 = fadd <4 x float> %10, %17
- %res4 = fadd <4 x float> %27, %res3
- ret <4 x float> %res4
-}
-
-define <2 x double> @test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask3_vfnmsub_sd(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = fneg <2 x double> [[X0:%.*]]
-; CHECK-NEXT: [[TMP6:%.*]] = fneg <2 x double> [[X2:%.*]]
-; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP5]], i64 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x double> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x double> [[TMP6]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = or i64 [[_MSPROP]], [[_MSPROP1]]
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i64 [[_MSPROP3]], [[_MSPROP2]]
-; CHECK-NEXT: [[TMP10:%.*]] = call double @llvm.fma.f64(double [[TMP7]], double [[TMP8]], double [[TMP9]])
-; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP13]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[_MSPROP4]], i64 [[_MSPROP5]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast double [[TMP10]] to i64
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast double [[TMP11]] to i64
-; CHECK-NEXT: [[TMP18:%.*]] = xor i64 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP18]], [[_MSPROP4]]
-; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP19]], [[_MSPROP5]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i64 [[TMP20]], i64 [[TMP15]]
-; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP14]], double [[TMP10]], double [[TMP11]]
-; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = insertelement <2 x double> [[X2]], double [[TMP21]], i64 0
-; CHECK-NEXT: [[TMP23:%.*]] = fneg <2 x double> [[X0]]
-; CHECK-NEXT: [[TMP24:%.*]] = fneg <2 x double> [[X2]]
-; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <2 x double> [[TMP23]], i64 0
-; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <2 x double> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <2 x double> [[TMP24]], i64 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[_MSPROP8]], 0
-; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i64 [[_MSPROP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
-; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i64 [[_MSPROP10]], 0
-; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
-; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
-; CHECK: 28:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 29:
-; CHECK-NEXT: [[TMP30:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP25]], double [[TMP26]], double [[TMP27]], i32 11)
-; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <2 x i64> [[TMP2]], i64 0, i64 0
-; CHECK-NEXT: [[TMP32:%.*]] = insertelement <2 x double> [[X2]], double [[TMP30]], i64 0
-; CHECK-NEXT: [[TMP33:%.*]] = fneg <2 x double> [[X0]]
-; CHECK-NEXT: [[TMP34:%.*]] = fneg <2 x double> [[X2]]
-; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <2 x i64> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <2 x double> [[TMP33]], i64 0
-; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <2 x double> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <2 x double> [[TMP34]], i64 0
-; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i64 [[_MSPROP13]], 0
-; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i64 [[_MSPROP14]], 0
-; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
-; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i64 [[_MSPROP15]], 0
-; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
-; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]]
-; CHECK: 38:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 39:
-; CHECK-NEXT: [[TMP40:%.*]] = call double @llvm.x86.avx512.vfmadd.f64(double [[TMP35]], double [[TMP36]], double [[TMP37]], i32 10)
-; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <2 x double> [[X2]], i64 0
-; CHECK-NEXT: [[TMP42:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP43:%.*]] = bitcast i8 [[X3]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP42]], i64 0
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP43]], i64 0
-; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i64 0, i64 [[_MSPROP16]]
-; CHECK-NEXT: [[TMP46:%.*]] = bitcast double [[TMP40]] to i64
-; CHECK-NEXT: [[TMP47:%.*]] = bitcast double [[TMP41]] to i64
-; CHECK-NEXT: [[TMP48:%.*]] = xor i64 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP49:%.*]] = or i64 [[TMP48]], 0
-; CHECK-NEXT: [[TMP50:%.*]] = or i64 [[TMP49]], [[_MSPROP16]]
-; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i64 [[TMP50]], i64 [[TMP45]]
-; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP44]], double [[TMP40]], double [[TMP41]]
-; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <2 x i64> [[TMP2]], i64 [[_MSPROP_SELECT18]], i64 0
-; CHECK-NEXT: [[TMP52:%.*]] = insertelement <2 x double> [[X2]], double [[TMP51]], i64 0
-; CHECK-NEXT: [[_MSPROP20:%.*]] = or <2 x i64> [[_MSPROP7]], [[_MSPROP12]]
-; CHECK-NEXT: [[RES3:%.*]] = fadd <2 x double> [[TMP22]], [[TMP32]]
-; CHECK-NEXT: [[_MSPROP21:%.*]] = or <2 x i64> [[_MSPROP19]], [[_MSPROP20]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <2 x double> [[TMP52]], [[RES3]]
-; CHECK-NEXT: store <2 x i64> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <2 x double> [[RES4]]
-;
- %1 = fneg <2 x double> %x0
- %2 = fneg <2 x double> %x2
- %3 = extractelement <2 x double> %1, i64 0
- %4 = extractelement <2 x double> %x1, i64 0
- %5 = extractelement <2 x double> %2, i64 0
- %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
- %7 = extractelement <2 x double> %x2, i64 0
- %8 = bitcast i8 %x3 to <8 x i1>
- %9 = extractelement <8 x i1> %8, i64 0
- %10 = select i1 %9, double %6, double %7
- %11 = insertelement <2 x double> %x2, double %10, i64 0
- %12 = fneg <2 x double> %x0
- %13 = fneg <2 x double> %x2
- %14 = extractelement <2 x double> %12, i64 0
- %15 = extractelement <2 x double> %x1, i64 0
- %16 = extractelement <2 x double> %13, i64 0
- %17 = call double @llvm.x86.avx512.vfmadd.f64(double %14, double %15, double %16, i32 11)
- %18 = extractelement <2 x double> %x2, i64 0
- %19 = insertelement <2 x double> %x2, double %17, i64 0
- %20 = fneg <2 x double> %x0
- %21 = fneg <2 x double> %x2
- %22 = extractelement <2 x double> %20, i64 0
- %23 = extractelement <2 x double> %x1, i64 0
- %24 = extractelement <2 x double> %21, i64 0
- %25 = call double @llvm.x86.avx512.vfmadd.f64(double %22, double %23, double %24, i32 10)
- %26 = extractelement <2 x double> %x2, i64 0
- %27 = bitcast i8 %x3 to <8 x i1>
- %28 = extractelement <8 x i1> %27, i64 0
- %29 = select i1 %28, double %25, double %26
- %30 = insertelement <2 x double> %x2, double %29, i64 0
- %res3 = fadd <2 x double> %11, %19
- %res4 = fadd <2 x double> %30, %res3
- ret <2 x double> %res4
-}
-
-define <4 x float> @test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask3_vfnmsub_ss(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = fneg <4 x float> [[X0:%.*]]
-; CHECK-NEXT: [[TMP6:%.*]] = fneg <4 x float> [[X2:%.*]]
-; CHECK-NEXT: [[_MSPROP:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[TMP5]], i64 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[TMP6]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = or i32 [[_MSPROP]], [[_MSPROP1]]
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP3]], [[_MSPROP2]]
-; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.fma.f32(float [[TMP7]], float [[TMP8]], float [[TMP9]])
-; CHECK-NEXT: [[_MSPROP5:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP12]], i64 0
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP13]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i32 [[_MSPROP4]], i32 [[_MSPROP5]]
-; CHECK-NEXT: [[TMP16:%.*]] = bitcast float [[TMP10]] to i32
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast float [[TMP11]] to i32
-; CHECK-NEXT: [[TMP18:%.*]] = xor i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP18]], [[_MSPROP4]]
-; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP19]], [[_MSPROP5]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP20]], i32 [[TMP15]]
-; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP14]], float [[TMP10]], float [[TMP11]]
-; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x float> [[X2]], float [[TMP21]], i64 0
-; CHECK-NEXT: [[TMP23:%.*]] = fneg <4 x float> [[X0]]
-; CHECK-NEXT: [[TMP24:%.*]] = fneg <4 x float> [[X2]]
-; CHECK-NEXT: [[_MSPROP8:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x float> [[TMP23]], i64 0
-; CHECK-NEXT: [[_MSPROP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x float> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP10:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x float> [[TMP24]], i64 0
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[_MSPROP8]], 0
-; CHECK-NEXT: [[_MSCMP22:%.*]] = icmp ne i32 [[_MSPROP9]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP22]]
-; CHECK-NEXT: [[_MSCMP23:%.*]] = icmp ne i32 [[_MSPROP10]], 0
-; CHECK-NEXT: [[_MSOR24:%.*]] = or i1 [[_MSOR]], [[_MSCMP23]]
-; CHECK-NEXT: br i1 [[_MSOR24]], label [[TMP28:%.*]], label [[TMP29:%.*]], !prof [[PROF1]]
-; CHECK: 28:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 29:
-; CHECK-NEXT: [[TMP30:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP25]], float [[TMP26]], float [[TMP27]], i32 11)
-; CHECK-NEXT: [[_MSPROP11:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[_MSPROP12:%.*]] = insertelement <4 x i32> [[TMP2]], i32 0, i64 0
-; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x float> [[X2]], float [[TMP30]], i64 0
-; CHECK-NEXT: [[TMP33:%.*]] = fneg <4 x float> [[X0]]
-; CHECK-NEXT: [[TMP34:%.*]] = fneg <4 x float> [[X2]]
-; CHECK-NEXT: [[_MSPROP13:%.*]] = extractelement <4 x i32> [[TMP1]], i64 0
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <4 x float> [[TMP33]], i64 0
-; CHECK-NEXT: [[_MSPROP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <4 x float> [[X1]], i64 0
-; CHECK-NEXT: [[_MSPROP15:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x float> [[TMP34]], i64 0
-; CHECK-NEXT: [[_MSCMP25:%.*]] = icmp ne i32 [[_MSPROP13]], 0
-; CHECK-NEXT: [[_MSCMP26:%.*]] = icmp ne i32 [[_MSPROP14]], 0
-; CHECK-NEXT: [[_MSOR27:%.*]] = or i1 [[_MSCMP25]], [[_MSCMP26]]
-; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[_MSPROP15]], 0
-; CHECK-NEXT: [[_MSOR29:%.*]] = or i1 [[_MSOR27]], [[_MSCMP28]]
-; CHECK-NEXT: br i1 [[_MSOR29]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]]
-; CHECK: 38:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 39:
-; CHECK-NEXT: [[TMP40:%.*]] = call float @llvm.x86.avx512.vfmadd.f32(float [[TMP35]], float [[TMP36]], float [[TMP37]], i32 10)
-; CHECK-NEXT: [[_MSPROP16:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <4 x float> [[X2]], i64 0
-; CHECK-NEXT: [[TMP42:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP43:%.*]] = bitcast i8 [[X3]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP17:%.*]] = extractelement <8 x i1> [[TMP42]], i64 0
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <8 x i1> [[TMP43]], i64 0
-; CHECK-NEXT: [[TMP45:%.*]] = select i1 [[TMP44]], i32 0, i32 [[_MSPROP16]]
-; CHECK-NEXT: [[TMP46:%.*]] = bitcast float [[TMP40]] to i32
-; CHECK-NEXT: [[TMP47:%.*]] = bitcast float [[TMP41]] to i32
-; CHECK-NEXT: [[TMP48:%.*]] = xor i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP48]], 0
-; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[_MSPROP16]]
-; CHECK-NEXT: [[_MSPROP_SELECT18:%.*]] = select i1 [[_MSPROP17]], i32 [[TMP50]], i32 [[TMP45]]
-; CHECK-NEXT: [[TMP51:%.*]] = select i1 [[TMP44]], float [[TMP40]], float [[TMP41]]
-; CHECK-NEXT: [[_MSPROP19:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT18]], i64 0
-; CHECK-NEXT: [[TMP52:%.*]] = insertelement <4 x float> [[X2]], float [[TMP51]], i64 0
-; CHECK-NEXT: [[_MSPROP20:%.*]] = or <4 x i32> [[_MSPROP7]], [[_MSPROP12]]
-; CHECK-NEXT: [[RES3:%.*]] = fadd <4 x float> [[TMP22]], [[TMP32]]
-; CHECK-NEXT: [[_MSPROP21:%.*]] = or <4 x i32> [[_MSPROP19]], [[_MSPROP20]]
-; CHECK-NEXT: [[RES4:%.*]] = fadd <4 x float> [[TMP52]], [[RES3]]
-; CHECK-NEXT: store <4 x i32> [[_MSPROP21]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[RES4]]
-;
- %1 = fneg <4 x float> %x0
- %2 = fneg <4 x float> %x2
- %3 = extractelement <4 x float> %1, i64 0
- %4 = extractelement <4 x float> %x1, i64 0
- %5 = extractelement <4 x float> %2, i64 0
- %6 = call float @llvm.fma.f32(float %3, float %4, float %5)
- %7 = extractelement <4 x float> %x2, i64 0
- %8 = bitcast i8 %x3 to <8 x i1>
- %9 = extractelement <8 x i1> %8, i64 0
- %10 = select i1 %9, float %6, float %7
- %11 = insertelement <4 x float> %x2, float %10, i64 0
- %12 = fneg <4 x float> %x0
- %13 = fneg <4 x float> %x2
- %14 = extractelement <4 x float> %12, i64 0
- %15 = extractelement <4 x float> %x1, i64 0
- %16 = extractelement <4 x float> %13, i64 0
- %17 = call float @llvm.x86.avx512.vfmadd.f32(float %14, float %15, float %16, i32 11)
- %18 = extractelement <4 x float> %x2, i64 0
- %19 = insertelement <4 x float> %x2, float %17, i64 0
- %20 = fneg <4 x float> %x0
- %21 = fneg <4 x float> %x2
- %22 = extractelement <4 x float> %20, i64 0
- %23 = extractelement <4 x float> %x1, i64 0
- %24 = extractelement <4 x float> %21, i64 0
- %25 = call float @llvm.x86.avx512.vfmadd.f32(float %22, float %23, float %24, i32 10)
- %26 = extractelement <4 x float> %x2, i64 0
- %27 = bitcast i8 %x3 to <8 x i1>
- %28 = extractelement <8 x i1> %27, i64 0
- %29 = select i1 %28, float %25, float %26
- %30 = insertelement <4 x float> %x2, float %29, i64 0
- %res3 = fadd <4 x float> %11, %19
- %res4 = fadd <4 x float> %30, %res3
- ret <4 x float> %res4
-}
-
-define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, ptr%ptr_b ,i8 %x3,i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask3_vfmadd_ss_rm(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
-; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
-; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]]
-; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]]
-; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]])
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP14]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0
-; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP5]], i32 [[_MSPROP3]]
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast float [[TMP13]] to i32
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast float [[TMP12]] to i32
-; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP20]], [[_MSPROP5]]
-; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP21]], [[_MSPROP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP22]], i32 [[TMP17]]
-; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP12]]
-; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP3]], i32 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[X1]], float [[TMP23]], i64 0
-; CHECK-NEXT: store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[TMP24]]
-;
- %q = load float, ptr %ptr_b
- %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
- %1 = extractelement <4 x float> %x0, i64 0
- %2 = extractelement <4 x float> %vecinit.i, i64 0
- %3 = extractelement <4 x float> %x1, i64 0
- %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
- %5 = bitcast i8 %x3 to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, float %4, float %3
- %8 = insertelement <4 x float> %x1, float %7, i64 0
- ret <4 x float> %8
-}
-
-define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,ptr%ptr_b ,i8 %x3,i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_mask_vfmadd_ss_rm(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 40) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[PTR_B]] to i64
-; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
-; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP9]], align 4
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
-; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]]
-; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]]
-; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.fma.f32(float [[TMP10]], float [[TMP11]], float [[TMP12]])
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
-; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
-; CHECK-NEXT: [[_MSPROP6:%.*]] = extractelement <8 x i1> [[TMP14]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i1> [[TMP15]], i64 0
-; CHECK-NEXT: [[TMP17:%.*]] = select i1 [[TMP16]], i32 [[_MSPROP5]], i32 [[_MSPROP1]]
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast float [[TMP13]] to i32
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast float [[TMP10]] to i32
-; CHECK-NEXT: [[TMP20:%.*]] = xor i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP20]], [[_MSPROP5]]
-; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP21]], [[_MSPROP1]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 [[_MSPROP6]], i32 [[TMP22]], i32 [[TMP17]]
-; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP16]], float [[TMP13]], float [[TMP10]]
-; CHECK-NEXT: [[_MSPROP7:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[X0]], float [[TMP23]], i64 0
-; CHECK-NEXT: store <4 x i32> [[_MSPROP7]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[TMP24]]
-;
- %q = load float, ptr %ptr_b
- %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
- %1 = extractelement <4 x float> %x0, i64 0
- %2 = extractelement <4 x float> %vecinit.i, i64 0
- %3 = extractelement <4 x float> %x1, i64 0
- %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
- %5 = bitcast i8 %x3 to <8 x i1>
- %6 = extractelement <8 x i1> %5, i64 0
- %7 = select i1 %6, float %4, float %1
- %8 = insertelement <4 x float> %x0, float %7, i64 0
- ret <4 x float> %8
-}
-
-
-define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,ptr%ptr_b ,i8 %x3,i32 %x4) #0 {
-; CHECK-LABEL: @test_int_x86_avx512_maskz_vfmadd_ss_rm(
-; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 5:
-; CHECK-NEXT: [[Q:%.*]] = load float, ptr [[PTR_B:%.*]], align 4
-; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[PTR_B]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
-; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
-; CHECK-NEXT: [[_MSLD:%.*]] = load i32, ptr [[TMP8]], align 4
-; CHECK-NEXT: [[_MSPROP:%.*]] = insertelement <4 x i32> splat (i32 -1), i32 [[_MSLD]], i32 0
-; CHECK-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x float> undef, float [[Q]], i32 0
-; CHECK-NEXT: [[_MSPROP1:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x float> [[X0:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP2:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x float> [[X1:%.*]], i64 0
-; CHECK-NEXT: [[_MSPROP3:%.*]] = extractelement <4 x i32> [[_MSPROP]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[VECINIT_I]], i64 0
-; CHECK-NEXT: [[_MSPROP4:%.*]] = or i32 [[_MSPROP1]], [[_MSPROP2]]
-; CHECK-NEXT: [[_MSPROP5:%.*]] = or i32 [[_MSPROP4]], [[_MSPROP3]]
-; CHECK-NEXT: [[TMP12:%.*]] = call float @llvm.fma.f32(float [[TMP9]], float [[TMP10]], float [[TMP11]])
-; CHECK-NEXT: [[TMP13:%.*]] = select i1 false, i32 [[_MSPROP5]], i32 0
-; CHECK-NEXT: [[TMP14:%.*]] = bitcast float [[TMP12]] to i32
-; CHECK-NEXT: [[TMP15:%.*]] = xor i32 [[TMP14]], 0
-; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP15]], [[_MSPROP5]]
-; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP16]], 0
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select i1 false, i32 [[TMP17]], i32 [[TMP13]]
-; CHECK-NEXT: [[TMP18:%.*]] = select i1 false, float [[TMP12]], float 0.000000e+00
-; CHECK-NEXT: [[_MSPROP6:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[_MSPROP_SELECT]], i64 0
-; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x float> [[X0]], float [[TMP18]], i64 0
-; CHECK-NEXT: store <4 x i32> [[_MSPROP6]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <4 x float> [[TMP19]]
-;
- %q = load float, ptr %ptr_b
- %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
- %1 = extractelement <4 x float> %x0, i64 0
- %2 = extractelement <4 x float> %x1, i64 0
- %3 = extractelement <4 x float> %vecinit.i, i64 0
- %4 = call float @llvm.fma.f32(float %1, float %2, float %3)
- %5 = select i1 false, float %4, float 0.000000e+00
- %6 = insertelement <4 x float> %x0, float %5, i64 0
- ret <4 x float> %6
-}
-
-define <16 x i32> @test_x86_avx512_psll_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psll_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32>
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
-; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
- ret <16 x i32> %res
-}
-define <16 x i32> @test_x86_avx512_mask_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psll_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
- ret <16 x i32> %res2
-}
-define <16 x i32> @test_x86_avx512_maskz_psll_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psll_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
- ret <16 x i32> %res2
-}
-declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i64> @test_x86_avx512_psll_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psll_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64>
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
-; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
- ret <8 x i64> %res
-}
-define <8 x i64> @test_x86_avx512_mask_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psll_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64>
-; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]]
-; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
- ret <8 x i64> %res2
-}
-define <8 x i64> @test_x86_avx512_maskz_psll_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psll_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64>
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
- ret <8 x i64> %res2
-}
-declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) nounwind readnone
-
-
-define <16 x i32> @test_x86_avx512_pslli_d_512(<16 x i32> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_pslli_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7)
-; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
- ret <16 x i32> %res
-}
-define <16 x i32> @test_x86_avx512_mask_pslli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_pslli_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
- ret <16 x i32> %res2
-}
-define <16 x i32> @test_x86_avx512_maskz_pslli_d_512(<16 x i32> %a0, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_pslli_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
- ret <16 x i32> %res2
-}
-declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) nounwind readnone
-
-
-define <8 x i64> @test_x86_avx512_pslli_q_512(<8 x i64> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_pslli_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7)
-; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
- ret <8 x i64> %res
-}
-define <8 x i64> @test_x86_avx512_mask_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_pslli_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
- ret <8 x i64> %res2
-}
-define <8 x i64> @test_x86_avx512_maskz_pslli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_pslli_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
- ret <8 x i64> %res2
-}
-declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) nounwind readnone
-
-
-define <8 x i64> @test_x86_avx512_psra_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psra_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64>
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
-; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
- ret <8 x i64> %res
-}
-define <8 x i64> @test_x86_avx512_mask_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psra_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64>
-; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]]
-; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
- ret <8 x i64> %res2
-}
-define <8 x i64> @test_x86_avx512_maskz_psra_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psra_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64>
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
- ret <8 x i64> %res2
-}
-declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) nounwind readnone
-
-
-define <16 x i32> @test_x86_avx512_psra_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psra_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32>
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
-; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
- ret <16 x i32> %res
-}
-define <16 x i32> @test_x86_avx512_mask_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psra_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
- ret <16 x i32> %res2
-}
-define <16 x i32> @test_x86_avx512_maskz_psra_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psra_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
- ret <16 x i32> %res2
-}
-declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) nounwind readnone
-
-
-
-define <8 x i64> @test_x86_avx512_psrai_q_512(<8 x i64> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_psrai_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7)
-; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
- ret <8 x i64> %res
-}
-define <8 x i64> @test_x86_avx512_mask_psrai_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psrai_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
- ret <8 x i64> %res2
-}
-define <8 x i64> @test_x86_avx512_maskz_psrai_q_512(<8 x i64> %a0, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psrai_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
- ret <8 x i64> %res2
-}
-declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) nounwind readnone
-
-
-define <16 x i32> @test_x86_avx512_psrai_d_512(<16 x i32> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_psrai_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7)
-; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
- ret <16 x i32> %res
-}
-define <16 x i32> @test_x86_avx512_mask_psrai_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psrai_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
- ret <16 x i32> %res2
-}
-define <16 x i32> @test_x86_avx512_maskz_psrai_d_512(<16 x i32> %a0, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psrai_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
- ret <16 x i32> %res2
-}
-declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) nounwind readnone
-
-
-
-define <16 x i32> @test_x86_avx512_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psrl_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <16 x i32>
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
-; CHECK-NEXT: store <16 x i32> [[TMP9]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
- ret <16 x i32> %res
-}
-define <16 x i32> @test_x86_avx512_mask_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psrl_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP11]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP12]], <16 x i32> [[TMP16]], <16 x i32> [[TMP13]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
- ret <16 x i32> %res2
-}
-define <16 x i32> @test_x86_avx512_maskz_psrl_d_512(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psrl_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <16 x i32>
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[TMP1]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP8]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> [[A0:%.*]], <4 x i32> [[A1]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP10]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[TMP10]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[TMP15]], <16 x i32> [[TMP12]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %a0, <4 x i32> %a1) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
- ret <16 x i32> %res2
-}
-declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) nounwind readnone
-
-
-define <8 x i64> @test_x86_avx512_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psrl_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i128 [[TMP3]] to i64
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[TMP4]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = sext i1 [[TMP5]] to i512
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast i512 [[TMP6]] to <8 x i64>
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
-; CHECK-NEXT: store <8 x i64> [[TMP9]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
- ret <8 x i64> %res
-}
-define <8 x i64> @test_x86_avx512_mask_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psrl_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i128 [[TMP5]] to i64
-; CHECK-NEXT: [[TMP7:%.*]] = icmp ne i64 [[TMP6]], 0
-; CHECK-NEXT: [[TMP8:%.*]] = sext i1 [[TMP7]] to i512
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i512 [[TMP8]] to <8 x i64>
-; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP9]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP11]], <8 x i64> [[TMP4]]
-; CHECK-NEXT: [[TMP14:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], [[TMP11]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <8 x i64> [[TMP15]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP12]], <8 x i64> [[TMP16]], <8 x i64> [[TMP13]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
- ret <8 x i64> %res2
-}
-define <8 x i64> @test_x86_avx512_maskz_psrl_q_512(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psrl_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 80) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x i64> [[TMP2]] to i128
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i128 [[TMP4]] to i64
-; CHECK-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = sext i1 [[TMP6]] to i512
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i512 [[TMP7]] to <8 x i64>
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[TMP1]], <2 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP8]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> [[A0:%.*]], <2 x i64> [[A1]])
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP10]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP13:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <8 x i64> [[TMP13]], [[TMP10]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <8 x i64> [[TMP14]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP11]], <8 x i64> [[TMP15]], <8 x i64> [[TMP12]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %a0, <2 x i64> %a1) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
- ret <8 x i64> %res2
-}
-declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) nounwind readnone
-
-
-define <16 x i32> @test_x86_avx512_psrli_d_512(<16 x i32> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_psrli_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP3:%.*]] = or <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7)
-; CHECK-NEXT: store <16 x i32> [[TMP3]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
- ret <16 x i32> %res
-}
-define <16 x i32> @test_x86_avx512_mask_psrli_d_512(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psrli_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = or <16 x i32> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP5]], <16 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = xor <16 x i32> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP5]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <16 x i32> [[TMP9]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP6]], <16 x i32> [[TMP10]], <16 x i32> [[TMP7]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[PASSTHRU]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %passthru
- ret <16 x i32> %res2
-}
-define <16 x i32> @test_x86_avx512_maskz_psrli_d_512(<16 x i32> %a0, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psrli_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i16 [[TMP2]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP4]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP5]], <16 x i32> [[TMP9]], <16 x i32> [[TMP6]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %a0, i32 7) ; <<16 x i32>> [#uses=1]
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
- ret <16 x i32> %res2
-}
-declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) nounwind readnone
-
-
-define <8 x i64> @test_x86_avx512_psrli_q_512(<8 x i64> %a0) #0 {
-; CHECK-LABEL: @test_x86_avx512_psrli_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP3:%.*]] = or <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7)
-; CHECK-NEXT: store <8 x i64> [[TMP3]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
- ret <8 x i64> %res
-}
-define <8 x i64> @test_x86_avx512_mask_psrli_q_512(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psrli_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i64> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP7:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP5]], <8 x i64> [[TMP3]]
-; CHECK-NEXT: [[TMP8:%.*]] = xor <8 x i64> [[RES]], [[PASSTHRU:%.*]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP5]]
-; CHECK-NEXT: [[TMP10:%.*]] = or <8 x i64> [[TMP9]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP6]], <8 x i64> [[TMP10]], <8 x i64> [[TMP7]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[PASSTHRU]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %passthru
- ret <8 x i64> %res2
-}
-define <8 x i64> @test_x86_avx512_maskz_psrli_q_512(<8 x i64> %a0, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psrli_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[TMP1]], i32 7)
-; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> [[A0:%.*]], i32 7)
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[TMP2]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP6:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP4]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP7:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP4]]
-; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP5]], <8 x i64> [[TMP9]], <8 x i64> [[TMP6]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %a0, i32 7) ; <<8 x i64>> [#uses=1]
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
- ret <8 x i64> %res2
-}
-declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) nounwind readnone
-
-define <16 x i32> @test_x86_avx512_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psllv_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
-; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
-; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_x86_avx512_psllv_d_512_const() #0 {
-; CHECK-LABEL: @test_x86_avx512_psllv_d_512_const(
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
-; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
-; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
-; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES0]], [[RES1]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res0 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
- %res1 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
- %res2 = add <16 x i32> %res0, %res1
- ret <16 x i32> %res2
-}
-
-define <16 x i32> @test_x86_avx512_mask_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psllv_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]]
-; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
- ret <16 x i32> %res2
-}
-
-define <16 x i32> @test_x86_avx512_maskz_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psllv_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
-; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %a0, <16 x i32> %a1)
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
- ret <16 x i32> %res2
-}
-
-declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) nounwind readnone
-
-define <8 x i64> @test_x86_avx512_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psllv_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64>
-; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
-; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_x86_avx512_psllv_q_512_const() #0 {
-; CHECK-LABEL: @test_x86_avx512_psllv_q_512_const(
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
-; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[RES0:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
-; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
-; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[RES2:%.*]] = add <8 x i64> [[RES0]], [[RES1]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res0 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1,i64 2, i64 0, i64 34, i64 -2>)
- %res1 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
- %res2 = add <8 x i64> %res0, %res1
- ret <8 x i64> %res2
-}
-
-define <8 x i64> @test_x86_avx512_mask_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psllv_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64>
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]]
-; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]]
-; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
- ret <8 x i64> %res2
-}
-
-define <8 x i64> @test_x86_avx512_maskz_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psllv_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64>
-; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %a0, <8 x i64> %a1)
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
- ret <8 x i64> %res2
-}
-
-declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) nounwind readnone
-
-define <16 x i32> @test_x86_avx512_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psrav_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
-; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
-; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_x86_avx512_mask_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psrav_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]]
-; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
- ret <16 x i32> %res2
-}
-
-define <16 x i32> @test_x86_avx512_maskz_psrav_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psrav_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
-; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %a0, <16 x i32> %a1)
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
- ret <16 x i32> %res2
-}
-
-declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) nounwind readnone
-
-define <8 x i64> @test_x86_avx512_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psrav_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64>
-; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
-; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_x86_avx512_mask_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psrav_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64>
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]]
-; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]]
-; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
- ret <8 x i64> %res2
-}
-
-define <8 x i64> @test_x86_avx512_maskz_psrav_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psrav_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64>
-; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %a0, <8 x i64> %a1)
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
- ret <8 x i64> %res2
-}
-
-declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) nounwind readnone
-
-define <16 x i32> @test_x86_avx512_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psrlv_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = sext <16 x i1> [[TMP3]] to <16 x i32>
-; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = or <16 x i32> [[TMP5]], [[TMP4]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
-; CHECK-NEXT: store <16 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
- ret <16 x i32> %res
-}
-
-define <16 x i32> @test_x86_avx512_psrlv_d_512_const() #0 {
-; CHECK-LABEL: @test_x86_avx512_psrlv_d_512_const(
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
-; CHECK-NEXT: [[TMP2:%.*]] = or <16 x i32> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[RES0:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1, i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
-; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> zeroinitializer, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
-; CHECK-NEXT: [[TMP4:%.*]] = or <16 x i32> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[RES1:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1>)
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[RES2:%.*]] = add <16 x i32> [[RES0]], [[RES1]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res0 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 2, i32 9, i32 0, i32 -1, i32 3, i32 7, i32 -1, i32 0, i32 4, i32 5, i32 -2, i32 0, i32 5, i32 3, i32 -3, i32 0>, <16 x i32> <i32 1, i32 0, i32 33, i32 -1,i32 2, i32 0, i32 34, i32 -2, i32 3, i32 0, i32 35, i32 -1, i32 4, i32 0, i32 36, i32 -3>)
- %res1 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 -1>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 -1 >)
- %res2 = add <16 x i32> %res0, %res1
- ret <16 x i32> %res2
-}
-
-define <16 x i32> @test_x86_avx512_mask_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psrlv_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = sext <16 x i1> [[TMP5]] to <16 x i32>
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP8]], <16 x i32> [[TMP4]]
-; CHECK-NEXT: [[TMP11:%.*]] = xor <16 x i32> [[RES]], [[A2:%.*]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], [[TMP8]]
-; CHECK-NEXT: [[TMP13:%.*]] = or <16 x i32> [[TMP12]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP13]], <16 x i32> [[TMP10]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> [[A2]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> %a2
- ret <16 x i32> %res2
-}
-
-define <16 x i32> @test_x86_avx512_maskz_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psrlv_d_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <16 x i32> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = sext <16 x i1> [[TMP4]] to <16 x i32>
-; CHECK-NEXT: [[TMP6:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[TMP1]], <16 x i32> [[A1:%.*]])
-; CHECK-NEXT: [[TMP7:%.*]] = or <16 x i32> [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[RES:%.*]] = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> [[A0:%.*]], <16 x i32> [[A1]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[TMP7]], <16 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = xor <16 x i32> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = or <16 x i32> [[TMP10]], [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <16 x i32> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP8]], <16 x i32> [[TMP12]], <16 x i32> [[TMP9]]
-; CHECK-NEXT: [[RES2:%.*]] = select <16 x i1> [[MASK_CAST]], <16 x i32> [[RES]], <16 x i32> zeroinitializer
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x i32> [[RES2]]
-;
- %res = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %a0, <16 x i32> %a1)
- %mask.cast = bitcast i16 %mask to <16 x i1>
- %res2 = select <16 x i1> %mask.cast, <16 x i32> %res, <16 x i32> zeroinitializer
- ret <16 x i32> %res2
-}
-
-declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) nounwind readnone
-
-define <8 x i64> @test_x86_avx512_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1) #0 {
-; CHECK-LABEL: @test_x86_avx512_psrlv_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP4:%.*]] = sext <8 x i1> [[TMP3]] to <8 x i64>
-; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP6:%.*]] = or <8 x i64> [[TMP5]], [[TMP4]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
-; CHECK-NEXT: store <8 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
- ret <8 x i64> %res
-}
-
-define <8 x i64> @test_x86_avx512_psrlv_q_512_const() #0 {
-; CHECK-LABEL: @test_x86_avx512_psrlv_q_512_const(
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
-; CHECK-NEXT: [[TMP2:%.*]] = or <8 x i64> [[TMP1]], zeroinitializer
-; CHECK-NEXT: [[RES0:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1, i64 2, i64 0, i64 34, i64 -2>)
-; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> zeroinitializer, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
-; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i64> [[TMP3]], zeroinitializer
-; CHECK-NEXT: [[RES1:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
-; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP2]], [[TMP4]]
-; CHECK-NEXT: [[RES2:%.*]] = add <8 x i64> [[RES0]], [[RES1]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res0 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 2, i64 9, i64 0, i64 -1, i64 3, i64 7, i64 -1, i64 0>, <8 x i64> <i64 1, i64 0, i64 33, i64 -1,i64 2, i64 0, i64 34, i64 -2>)
- %res1 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> <i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 4, i64 -1>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 -1>)
- %res2 = add <8 x i64> %res0, %res1
- ret <8 x i64> %res2
-}
-
-define <8 x i64> @test_x86_avx512_mask_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_mask_psrlv_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i1> [[TMP5]] to <8 x i64>
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP7]], [[TMP6]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP10:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP8]], <8 x i64> [[TMP4]]
-; CHECK-NEXT: [[TMP11:%.*]] = xor <8 x i64> [[RES]], [[A2:%.*]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], [[TMP8]]
-; CHECK-NEXT: [[TMP13:%.*]] = or <8 x i64> [[TMP12]], [[TMP4]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP9]], <8 x i64> [[TMP13]], <8 x i64> [[TMP10]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> [[A2]]
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> %a2
- ret <8 x i64> %res2
-}
-
-define <8 x i64> @test_x86_avx512_maskz_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) #0 {
-; CHECK-LABEL: @test_x86_avx512_maskz_psrlv_q_512(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <8 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i1> [[TMP4]] to <8 x i64>
-; CHECK-NEXT: [[TMP6:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[TMP1]], <8 x i64> [[A1:%.*]])
-; CHECK-NEXT: [[TMP7:%.*]] = or <8 x i64> [[TMP6]], [[TMP5]]
-; CHECK-NEXT: [[RES:%.*]] = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> [[A0:%.*]], <8 x i64> [[A1]])
-; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
-; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT: [[TMP9:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[TMP7]], <8 x i64> zeroinitializer
-; CHECK-NEXT: [[TMP10:%.*]] = xor <8 x i64> [[RES]], zeroinitializer
-; CHECK-NEXT: [[TMP11:%.*]] = or <8 x i64> [[TMP10]], [[TMP7]]
-; CHECK-NEXT: [[TMP12:%.*]] = or <8 x i64> [[TMP11]], zeroinitializer
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <8 x i1> [[TMP8]], <8 x i64> [[TMP12]], <8 x i64> [[TMP9]]
-; CHECK-NEXT: [[RES2:%.*]] = select <8 x i1> [[MASK_CAST]], <8 x i64> [[RES]], <8 x i64> zeroinitializer
-; CHECK-NEXT: store <8 x i64> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES2]]
-;
- %res = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %a0, <8 x i64> %a1)
- %mask.cast = bitcast i8 %mask to <8 x i1>
- %res2 = select <8 x i1> %mask.cast, <8 x i64> %res, <8 x i64> zeroinitializer
- ret <8 x i64> %res2
-}
-
-declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) nounwind readnone
-
-
-define <8 x double> @test_mm256_castpd128_pd256_freeze(<2 x double> %a0) nounwind #0 {
-; CHECK-LABEL: @test_mm256_castpd128_pd256_freeze(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[A1:%.*]] = freeze <2 x double> poison
-; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES]]
-;
- %a1 = freeze <2 x double> poison
- %res = shufflevector <2 x double> %a0, <2 x double> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
- ret <8 x double> %res
-}
-
-
-define <8 x double> @test_mm256_castpd256_pd256_freeze(<4 x double> %a0) nounwind #0 {
-; CHECK-LABEL: @test_mm256_castpd256_pd256_freeze(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[A1:%.*]] = freeze <4 x double> poison
-; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x double> [[RES]]
-;
- %a1 = freeze <4 x double> poison
- %res = shufflevector <4 x double> %a0, <4 x double> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- ret <8 x double> %res
-}
-
-
-define <16 x float> @test_mm256_castps128_ps512_freeze(<4 x float> %a0) nounwind #0 {
-; CHECK-LABEL: @test_mm256_castps128_ps512_freeze(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[A1:%.*]] = freeze <4 x float> poison
-; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES]]
-;
- %a1 = freeze <4 x float> poison
- %res = shufflevector <4 x float> %a0, <4 x float> %a1, <16x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
- ret <16 x float> %res
-}
-
-
-define <16 x float> @test_mm256_castps256_ps512_freeze(<8 x float> %a0) nounwind #0 {
-; CHECK-LABEL: @test_mm256_castps256_ps512_freeze(
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[A1:%.*]] = freeze <8 x float> poison
-; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> [[A1]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: store <16 x i32> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[RES]]
-;
- %a1 = freeze <8 x float> poison
- %res = shufflevector <8 x float> %a0, <8 x float> %a1, <16x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- ret <16 x float> %res
-}
-
-
-define <8 x i64> @test_mm512_castsi128_si512_freeze(<2 x i64> %a0) nounwind #0 {
-; CHECK-LABEL: @test_mm512_castsi128_si512_freeze(
-; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[A1:%.*]] = freeze <2 x i64> poison
-; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <2 x i64> [[TMP1]], <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <2 x i64> [[A0:%.*]], <2 x i64> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
-; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %a1 = freeze <2 x i64> poison
- %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
- ret <8 x i64> %res
-}
-
-
-define <8 x i64> @test_mm512_castsi256_si512_pd256_freeze(<4 x i64> %a0) nounwind #0 {
-; CHECK-LABEL: @test_mm512_castsi256_si512_pd256_freeze(
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[A1:%.*]] = freeze <4 x i64> poison
-; CHECK-NEXT: [[_MSPROP:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[RES:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> [[A1]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: store <8 x i64> [[_MSPROP]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <8 x i64> [[RES]]
-;
- %a1 = freeze <4 x i64> poison
- %res = shufflevector <4 x i64> %a0, <4 x i64> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- ret <8 x i64> %res
-}
-
-
-define <16 x float> @bad_mask_transition(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) #0 {
-; CHECK-LABEL: @bad_mask_transition(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
-; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 320) to ptr), align 8
-; CHECK-NEXT: [[TMP5:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 256) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP0]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP7]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
-; CHECK: 8:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 9:
-; CHECK-NEXT: [[TMP10:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4)
-; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i1> [[TMP10]] to i8
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i512 [[TMP12]], 0
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i64> [[TMP3]] to i512
-; CHECK-NEXT: [[_MSCMP3:%.*]] = icmp ne i512 [[TMP13]], 0
-; CHECK-NEXT: [[_MSOR4:%.*]] = or i1 [[_MSCMP2]], [[_MSCMP3]]
-; CHECK-NEXT: br i1 [[_MSOR4]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
-; CHECK: 14:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 15:
-; CHECK-NEXT: [[TMP16:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[C:%.*]], <8 x double> [[D:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4)
-; CHECK-NEXT: [[TMP17:%.*]] = bitcast <8 x i1> [[TMP16]] to i8
-; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP11]] to i16
-; CHECK-NEXT: [[CONV2:%.*]] = zext i8 [[TMP17]] to i16
-; CHECK-NEXT: [[TMP18:%.*]] = bitcast i16 [[CONV]] to <16 x i1>
-; CHECK-NEXT: [[TMP19:%.*]] = bitcast i16 [[CONV2]] to <16 x i1>
-; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <16 x i1> [[TMP18]], <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP21:%.*]] = shufflevector <16 x i1> [[TMP19]], <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP22:%.*]] = shufflevector <8 x i1> [[TMP20]], <8 x i1> [[TMP21]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT: [[TMP23:%.*]] = select <16 x i1> [[TMP22]], <16 x i32> [[TMP4]], <16 x i32> [[TMP5]]
-; CHECK-NEXT: [[TMP24:%.*]] = bitcast <16 x float> [[F:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP25:%.*]] = bitcast <16 x float> [[E:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP26:%.*]] = xor <16 x i32> [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP27:%.*]] = or <16 x i32> [[TMP26]], [[TMP4]]
-; CHECK-NEXT: [[TMP28:%.*]] = or <16 x i32> [[TMP27]], [[TMP5]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> zeroinitializer, <16 x i32> [[TMP28]], <16 x i32> [[TMP23]]
-; CHECK-NEXT: [[TMP29:%.*]] = select <16 x i1> [[TMP22]], <16 x float> [[F]], <16 x float> [[E]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP29]]
-;
-entry:
- %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
- %1 = bitcast <8 x i1> %0 to i8
- %2 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %c, <8 x double> %d, i32 17, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
- %3 = bitcast <8 x i1> %2 to i8
- %conv = zext i8 %1 to i16
- %conv2 = zext i8 %3 to i16
- %4 = bitcast i16 %conv to <16 x i1>
- %5 = bitcast i16 %conv2 to <16 x i1>
- %6 = shufflevector <16 x i1> %4, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %7 = shufflevector <16 x i1> %5, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
- %8 = shufflevector <8 x i1> %6, <8 x i1> %7, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
- %9 = select <16 x i1> %8, <16 x float> %f, <16 x float> %e
- ret <16 x float> %9
-}
-
-define <16 x float> @bad_mask_transition_2(<8 x double> %a, <8 x double> %b, <8 x double> %c, <8 x double> %d, <16 x float> %e, <16 x float> %f) #0 {
-; CHECK-LABEL: @bad_mask_transition_2(
-; CHECK-NEXT: entry:
-; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 320) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 256) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP0]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 17, <8 x i1> splat (i1 true), i32 4)
-; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i1> [[TMP8]] to i8
-; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP9]] to i16
-; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[CONV]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP2]], <16 x i32> [[TMP3]]
-; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[F:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[E:%.*]] to <16 x i32>
-; CHECK-NEXT: [[TMP14:%.*]] = xor <16 x i32> [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], [[TMP2]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[TMP3]]
-; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> zeroinitializer, <16 x i32> [[TMP16]], <16 x i32> [[TMP11]]
-; CHECK-NEXT: [[TMP17:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[F]], <16 x float> [[E]]
-; CHECK-NEXT: store <16 x i32> [[_MSPROP_SELECT]], ptr @__msan_retval_tls, align 8
-; CHECK-NEXT: ret <16 x float> [[TMP17]]
-;
-entry:
- %0 = call <8 x i1> @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 17, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, i32 4)
- %1 = bitcast <8 x i1> %0 to i8
- %conv = zext i8 %1 to i16
- %2 = bitcast i16 %conv to <16 x i1>
- %3 = select <16 x i1> %2, <16 x float> %f, <16 x float> %e
- ret <16 x float> %3
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.compress.v8f64(<8 x double>, <8 x double>, <8 x i1>)
-declare <16 x float> @llvm.x86.avx512.mask.compress.v16f32(<16 x float>, <16 x float>, <16 x i1>)
-declare <8 x i64> @llvm.x86.avx512.mask.compress.v8i64(<8 x i64>, <8 x i64>, <8 x i1>)
-declare <16 x i32> @llvm.x86.avx512.mask.compress.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
-declare <8 x double> @llvm.x86.avx512.mask.expand.v8f64(<8 x double>, <8 x double>, <8 x i1>)
-declare <16 x float> @llvm.x86.avx512.mask.expand.v16f32(<16 x float>, <16 x float>, <16 x i1>)
-declare <8 x i64> @llvm.x86.avx512.mask.expand.v8i64(<8 x i64>, <8 x i64>, <8 x i1>)
-declare <16 x i32> @llvm.x86.avx512.mask.expand.v16i32(<16 x i32>, <16 x i32>, <16 x i1>)
-
-attributes #0 = { sanitize_memory }
>From 56c0267f91967d6db9f9e9f5f93ad63343464230 Mon Sep 17 00:00:00 2001
From: Thurston Dang <thurston at google.com>
Date: Mon, 27 Jan 2025 01:01:38 +0000
Subject: [PATCH 5/5] Address reviewer comments and rebase
---
.../Instrumentation/MemorySanitizer.cpp | 14 +-
.../MemorySanitizer/X86/avx512-intrinsics.ll | 212 +++++-------------
2 files changed, 58 insertions(+), 168 deletions(-)
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index bdd944aaa6360a..491d37799c2d50 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2997,8 +2997,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
/// fine).
///
/// Caller guarantees that this intrinsic does not access memory.
- bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I,
- unsigned int trailingFlags) {
+ [[maybe_unused]] bool
+ maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I,
+ unsigned int trailingFlags) {
Type *RetTy = I.getType();
if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy()))
return false;
@@ -3049,7 +3050,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
}
if (I.doesNotAccessMemory())
- if (maybeHandleSimpleNomemIntrinsic(I, /* trailingFlags */ 0))
+ if (maybeHandleSimpleNomemIntrinsic(I, /*trailingFlags=*/0))
return true;
// FIXME: detect and handle SSE maskstore/maskload?
@@ -4621,6 +4622,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
case Intrinsic::x86_avx2_maskload_d_256:
case Intrinsic::x86_avx2_maskload_q_256: {
handleAVXMaskedLoad(I);
+ break;
+ }
// Packed
case Intrinsic::x86_avx512_min_ps_512:
@@ -4630,9 +4633,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
// These AVX512 variants contain the rounding mode as a trailing flag.
// Earlier variants do not have a trailing flag and are already handled
// by maybeHandleSimpleNomemIntrinsic(I, 0) via handleUnknownIntrinsic.
- bool success = maybeHandleSimpleNomemIntrinsic(I, /* trailingFlags */ 1);
- (void)success;
- assert(success);
+ bool Success = maybeHandleSimpleNomemIntrinsic(I, /*trailingFlags=*/1);
+ assert(Success);
break;
}
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
index 052b497831ee12..1bd5f61a316ea1 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
@@ -1870,18 +1870,10 @@ define <8 x double> @test_vmaxpd(<8 x double> %a0, <8 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x double> [[TMP7]]
;
%1 = call <8 x double> @llvm.x86.avx512.max.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
@@ -1894,18 +1886,10 @@ define <8 x double> @test_vminpd(<8 x double> %a0, <8 x double> %a1) #0 {
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i64> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <8 x i64> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <8 x i64> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> [[A0:%.*]], <8 x double> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <8 x i64> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x double> [[TMP7]]
;
%1 = call <8 x double> @llvm.x86.avx512.min.pd.512(<8 x double> %a0, <8 x double> %a1, i32 4)
@@ -3828,23 +3812,15 @@ define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x f
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
@@ -3863,23 +3839,15 @@ define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
@@ -3899,24 +3867,16 @@ define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x fl
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
@@ -3936,24 +3896,16 @@ define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
@@ -3971,18 +3923,10 @@ define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float>
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x float> [[TMP7]]
;
%1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
@@ -3994,18 +3938,10 @@ define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x flo
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x float> [[TMP7]]
;
%1 = call <16 x float> @llvm.x86.avx512.min.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
@@ -4019,23 +3955,15 @@ define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x f
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
@@ -4054,23 +3982,15 @@ define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
-; CHECK: 6:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> zeroinitializer, <16 x i32> zeroinitializer
+; CHECK-NEXT: [[TMP11:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[_MSPROP1]], <16 x i32> zeroinitializer
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
; CHECK-NEXT: [[TMP13:%.*]] = xor <16 x i32> [[TMP12]], zeroinitializer
-; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], zeroinitializer
+; CHECK-NEXT: [[TMP14:%.*]] = or <16 x i32> [[TMP13]], [[_MSPROP1]]
; CHECK-NEXT: [[TMP15:%.*]] = or <16 x i32> [[TMP14]], zeroinitializer
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP9]], <16 x i32> [[TMP15]], <16 x i32> [[TMP11]]
; CHECK-NEXT: [[TMP16:%.*]] = select <16 x i1> [[TMP10]], <16 x float> [[TMP8]], <16 x float> zeroinitializer
@@ -4090,24 +4010,16 @@ define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x fl
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
@@ -4127,24 +4039,16 @@ define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP5]], 0
-; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP6]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
-; CHECK: 7:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 8:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> zeroinitializer, <16 x i32> [[TMP4]]
+; CHECK-NEXT: [[TMP12:%.*]] = select <16 x i1> [[TMP11]], <16 x i32> [[_MSPROP1]], <16 x i32> [[TMP4]]
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x float> [[SRC:%.*]] to <16 x i32>
; CHECK-NEXT: [[TMP15:%.*]] = xor <16 x i32> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], zeroinitializer
+; CHECK-NEXT: [[TMP16:%.*]] = or <16 x i32> [[TMP15]], [[_MSPROP1]]
; CHECK-NEXT: [[TMP17:%.*]] = or <16 x i32> [[TMP16]], [[TMP4]]
; CHECK-NEXT: [[_MSPROP_SELECT:%.*]] = select <16 x i1> [[TMP10]], <16 x i32> [[TMP17]], <16 x i32> [[TMP12]]
; CHECK-NEXT: [[TMP18:%.*]] = select <16 x i1> [[TMP11]], <16 x float> [[TMP9]], <16 x float> [[SRC]]
@@ -4162,18 +4066,10 @@ define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float>
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 8)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x float> [[TMP7]]
;
%1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 8)
@@ -4185,18 +4081,10 @@ define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x flo
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to i512
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i512 [[TMP3]], 0
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to i512
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i512 [[TMP4]], 0
-; CHECK-NEXT: [[_MSOR:%.*]] = or i1 [[_MSCMP]], [[_MSCMP1]]
-; CHECK-NEXT: br i1 [[_MSOR]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK-NEXT: [[_MSPROP:%.*]] = or <16 x i32> [[TMP1]], [[TMP2]]
+; CHECK-NEXT: [[_MSPROP1:%.*]] = or <16 x i32> [[_MSPROP]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> [[A0:%.*]], <16 x float> [[A1:%.*]], i32 4)
-; CHECK-NEXT: store <16 x i32> zeroinitializer, ptr @__msan_retval_tls, align 8
+; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x float> [[TMP7]]
;
%1 = call <16 x float> @llvm.x86.avx512.max.ps.512(<16 x float> %a0, <16 x float> %a1, i32 4)
More information about the llvm-commits
mailing list