[llvm] 7c66099 - [msan] Simplify 'maskedCheckAVXIndexShadow' (#147839)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jul 9 17:56:21 PDT 2025
Author: Thurston Dang
Date: 2025-07-09T17:56:16-07:00
New Revision: 7c66099545e84374d1df18da168ac12a4a9422cb
URL: https://github.com/llvm/llvm-project/commit/7c66099545e84374d1df18da168ac12a4a9422cb
DIFF: https://github.com/llvm/llvm-project/commit/7c66099545e84374d1df18da168ac12a4a9422cb.diff
LOG: [msan] Simplify 'maskedCheckAVXIndexShadow' (#147839)
The current instrumentation has more or and element extraction than a
coal mine:
```
[[TMP10:%.*]] = extractelement <16 x i32> [[TMP9]], i64 0
[[TMP11:%.*]] = and i32 [[TMP10]], 15
[[TMP43:%.*]] = or i32 [[TMP10]], [[TMP11]]
[[TMP12:%.*]] = extractelement <16 x i32> [[TMP9]], i64 1
[[TMP13:%.*]] = and i32 [[TMP12]], 15
[[TMP44:%.*]] = or i32 [[TMP12]], [[TMP13]]
...
[[TMP40:%.*]] = extractelement <16 x i32> [[TMP9]], i64 15
[[TMP41:%.*]] = and i32 [[TMP40]], 15
[[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]]
[[_MSCMP:%.*]] = icmp ne i32 [[TMP57]], 0
br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]]
```
Simplify it to:
```
[[TMP10:%.*]] = trunc <16 x i32> [[T]] to <16 x i4>
[[TMP12:%.*]] = bitcast <16 x i4> [[TMP10]] to i64
[[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0
br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
```
Added:
Modified:
llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll
llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
llvm/test/Instrumentation/MemorySanitizer/X86/x86-vpermi2.ll
llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 291bde54d9fcf..8b4276d55fa2a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -4307,23 +4307,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
setOrigin(&I, PtrSrcOrigin);
}
+ // Test whether the mask indices are initialized, only checking the bits that
+ // are actually used.
+ //
+ // e.g., if Idx is <32 x i16>, only (log2(32) == 5) bits of each index are
+ // used/checked.
void maskedCheckAVXIndexShadow(IRBuilder<> &IRB, Value *Idx, Instruction *I) {
+ assert(isFixedIntVector(Idx));
auto IdxVectorSize =
cast<FixedVectorType>(Idx->getType())->getNumElements();
assert(isPowerOf2_64(IdxVectorSize));
- auto *IdxVectorElemType =
- cast<FixedVectorType>(Idx->getType())->getElementType();
- Constant *IndexBits =
- ConstantInt::get(IdxVectorElemType, IdxVectorSize - 1);
- auto *IdxShadow = getShadow(Idx);
- // Only the low bits of Idx are used.
- Value *V = nullptr;
- for (size_t i = 0; i < IdxVectorSize; ++i) {
- V = IRB.CreateExtractElement(IdxShadow, i);
- assert(V->getType() == IndexBits->getType());
- V = IRB.CreateOr(V, IRB.CreateAnd(V, IndexBits));
- }
- insertCheckShadow(V, getOrigin(Idx), I);
+
+ // Compiler isn't smart enough, let's help it
+ if (auto *ConstantIdx = dyn_cast<Constant>(Idx))
+ return;
+
+ Value *Truncated = IRB.CreateTrunc(
+ Idx,
+ FixedVectorType::get(Type::getIntNTy(*MS.C, Log2_64(IdxVectorSize)),
+ IdxVectorSize));
+ insertCheckShadow(Truncated, getOrigin(Idx), I);
}
// Instrument AVX permutation intrinsic.
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
index 6542e56b3fefe..5bf529d7d32df 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx-intrinsics-x86.ll
@@ -948,23 +948,18 @@ declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_pd(
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP3]], 1
-; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP9]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 1
-; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <2 x i64> [[A1:%.*]] to <2 x i1>
; CHECK-NEXT: [[A0:%.*]] = bitcast <2 x i64> [[TMP1]] to <2 x double>
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <2 x double> [[RES]] to <2 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
-; CHECK: 12:
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x i1> [[TMP2]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 13:
+; CHECK: 8:
; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A2:%.*]], <2 x i64> [[A1]])
; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x double> [[RES1]]
@@ -978,29 +973,18 @@ declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwi
define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP3]], 3
-; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP15]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 3
-; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 3
-; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3
-; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[A1:%.*]] to <4 x i2>
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
-; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP14]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
-; CHECK: 18:
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i2> [[TMP2]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 19:
+; CHECK: 8:
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A2:%.*]], <4 x i64> [[A1]])
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x double> [[RES1]]
@@ -1028,29 +1012,18 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 {
define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_ps(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP3]], 3
-; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP15]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 3
-; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 3
-; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3
-; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i32> [[A1:%.*]] to <4 x i2>
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP14]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP18:%.*]], label [[TMP19:%.*]], !prof [[PROF1]]
-; CHECK: 18:
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x i2> [[TMP2]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 19:
+; CHECK: 8:
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A2:%.*]], <4 x i32> [[A1]])
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x float> [[RES1]]
@@ -1074,27 +1047,17 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0
; CHECK-NEXT: [[TMP6:%.*]] = xor i64 [[TMP5]], 87960930222080
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 3
-; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 1
-; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP11]], 3
-; CHECK-NEXT: [[TMP13:%.*]] = or i32 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 3
-; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 3
-; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 3
-; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[A2]] to <4 x i2>
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP19]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP23:%.*]], label [[TMP24:%.*]], !prof [[PROF1]]
-; CHECK: 23:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i2> [[TMP8]] to i8
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 13:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 24:
+; CHECK: 14:
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A3:%.*]], <4 x i32> [[A2]])
; CHECK-NEXT: store <4 x i32> [[TMP10]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x float> [[RES1]]
@@ -1109,41 +1072,18 @@ declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind
define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_ps_256(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP3]], 7
-; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP27]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 7
-; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 7
-; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7
-; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 7
-; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7
-; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 7
-; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i32> [[A1:%.*]] to <8 x i3>
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
-; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP26]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
-; CHECK: 30:
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i3> [[TMP2]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 31:
+; CHECK: 8:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A2:%.*]], <8 x i32> [[A1]])
; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x float> [[RES1]]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll
index e7854538aa3e8..0421d525890e7 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics-upgrade.ll
@@ -8141,41 +8141,18 @@ declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x
define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2) #0 {
; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP3]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP3]], [[TMP27]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP6]], 7
-; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP29]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 7
-; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7
-; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7
-; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7
-; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], 7
-; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
-; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x double> [[TMP7]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP26]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
-; CHECK: 30:
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i3> [[TMP2]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 31:
+; CHECK: 8:
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X2:%.*]], <8 x i64> [[X1]])
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x double> [[TMP5]]
@@ -8188,43 +8165,20 @@ define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0,
;
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermilvar_pd_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP5]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP5]], [[TMP29]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP31:%.*]] = and i64 [[TMP8]], 7
-; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP8]], [[TMP31]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP36:%.*]] = and i64 [[TMP35]], 7
-; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP39:%.*]] = and i64 [[TMP38]], 7
-; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP42:%.*]] = and i64 [[TMP41]], 7
-; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 7
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP23]], 7
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP26]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP28]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP32:%.*]], label [[TMP33:%.*]], !prof [[PROF1]]
-; CHECK: 32:
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i3> [[TMP5]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 33:
+; CHECK: 10:
; CHECK-NEXT: [[TMP7:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X4:%.*]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
@@ -8247,42 +8201,19 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermilvar_pd_512(<8 x double> %x0
;
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermilvar_pd_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP4]], 7
-; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP4]], [[TMP28]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP7]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP7]], [[TMP30]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP34]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP38:%.*]] = and i64 [[TMP37]], 7
-; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP40]], 7
-; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP40]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7
-; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], 7
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
-; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x double> [[TMP8]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP27]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP31:%.*]], label [[TMP32:%.*]], !prof [[PROF1]]
-; CHECK: 31:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i3> [[TMP4]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 32:
+; CHECK: 9:
; CHECK-NEXT: [[TMP6:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X2:%.*]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
@@ -8305,65 +8236,18 @@ declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x
define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2) #0 {
; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP3]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP3]], [[TMP51]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP6]], 15
-; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP53]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 15
-; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15
-; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 15
-; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15
-; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15
-; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10
-; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15
-; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11
-; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12
-; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP39]], 15
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP39]], [[TMP40]]
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13
-; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP42]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP42]], [[TMP43]]
-; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14
-; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP45]], [[TMP46]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15
-; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15
-; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP48]], [[TMP49]]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
-; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x float> [[TMP7]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP50]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP54:%.*]], label [[TMP55:%.*]], !prof [[PROF1]]
-; CHECK: 54:
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i4> [[TMP2]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP9:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 55:
+; CHECK: 8:
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X2:%.*]], <16 x i32> [[X1]])
; CHECK-NEXT: store <16 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x float> [[TMP5]]
@@ -8376,67 +8260,20 @@ define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0,
;
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermilvar_ps_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP5]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP5]], [[TMP53]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP55:%.*]] = and i32 [[TMP8]], 15
-; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP8]], [[TMP55]]
-; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP60:%.*]] = and i32 [[TMP59]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP59]], [[TMP60]]
-; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP63:%.*]] = and i32 [[TMP62]], 15
-; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP62]], [[TMP63]]
-; CHECK-NEXT: [[TMP65:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP66:%.*]] = and i32 [[TMP65]], 15
-; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP65]], [[TMP66]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15
-; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8
-; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15
-; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP34:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10
-; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15
-; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15
-; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12
-; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13
-; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14
-; CHECK-NEXT: [[TMP48:%.*]] = and i32 [[TMP47]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP47]], [[TMP48]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP50]], [[TMP51]]
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP52]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP56:%.*]], label [[TMP57:%.*]], !prof [[PROF1]]
-; CHECK: 56:
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i4> [[TMP5]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 57:
+; CHECK: 10:
; CHECK-NEXT: [[TMP7:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X4:%.*]], <16 x i32> [[X1]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
@@ -8460,66 +8297,19 @@ define <16 x float>@test_int_x86_avx512_maskz_vpermilvar_ps_512(<16 x float> %x0
;
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermilvar_ps_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP52:%.*]] = and i32 [[TMP4]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP4]], [[TMP52]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP54:%.*]] = and i32 [[TMP7]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP7]], [[TMP54]]
-; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP59:%.*]] = and i32 [[TMP58]], 15
-; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP58]], [[TMP59]]
-; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP62:%.*]] = and i32 [[TMP61]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP61]], [[TMP62]]
-; CHECK-NEXT: [[TMP64:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP64]], 15
-; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP64]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 15
-; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15
-; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9
-; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15
-; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10
-; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11
-; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15
-; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12
-; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13
-; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 15
-; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14
-; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP49:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP49]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP49]], [[TMP50]]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
-; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[TMP8:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[TMP8]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP51]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP55:%.*]], label [[TMP56:%.*]], !prof [[PROF1]]
-; CHECK: 55:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i4> [[TMP4]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP17:%.*]], label [[TMP18:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 56:
+; CHECK: 9:
; CHECK-NEXT: [[TMP6:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X2:%.*]], <16 x i32> [[X1]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
@@ -13911,74 +13701,27 @@ define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
-; CHECK-NEXT: [[TMP14:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK: 5:
; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP14]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP42]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP42]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP14]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i32> [[TMP14]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP45]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP45]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP14]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP14]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP14]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP14]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP14]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP14]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP14]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP14]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP14]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP14]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP14]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP14]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP14]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1:%.*]], <16 x i32> [[TMP4]])
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP57]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP59:%.*]], label [[TMP60:%.*]], !prof [[PROF1]]
-; CHECK: 59:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 60:
+; CHECK-NEXT: [[TMP14:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1]], <16 x i32> [[TMP4]])
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i4> [[TMP14]] to i64
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP11]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP12:%.*]], label [[TMP13:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1]], <16 x i32> [[X4:%.*]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i32> [[TMP10]]
@@ -13993,8 +13736,8 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512(
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 200) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
@@ -14007,61 +13750,15 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP42]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1:%.*]], <16 x i32> [[_MSLD]])
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP57]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP59:%.*]], label [[TMP60:%.*]], !prof [[PROF1]]
-; CHECK: 59:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 60:
+; CHECK-NEXT: [[TMP18:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1]], <16 x i32> [[_MSLD]])
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i4> [[TMP18]] to i64
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP19]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP20:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1]], <16 x i32> [[X2]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
@@ -14087,40 +13784,18 @@ define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 7
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP6]], 7
-; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP6]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP8]], i64 2
-; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP23]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP23]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP8]], i64 3
-; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP8]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7
-; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP8]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 7
-; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP8]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP8]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP3]] to <8 x double>
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP4]], <8 x i64> [[X1:%.*]], <8 x double> [[TMP5]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP4]], <8 x i64> [[X1]], <8 x double> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP36]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP32:%.*]], label [[TMP33:%.*]], !prof [[PROF1]]
-; CHECK: 32:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i3> [[TMP6]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 10:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 33:
+; CHECK: 11:
; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1]], <8 x double> [[X2:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double>
; CHECK-NEXT: store <8 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
@@ -14138,40 +13813,18 @@ define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0,
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP7]], 7
-; CHECK-NEXT: [[TMP38:%.*]] = or i64 [[TMP7]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP24]], 7
-; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP24]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 7
-; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP31:%.*]] = and i64 [[TMP30]], 7
-; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 7
-; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP34]], 7
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 7
-; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP36]], [[TMP37]]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to <8 x double>
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP5]], <8 x i64> [[X1:%.*]], <8 x double> [[TMP6]])
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP5]], <8 x i64> [[X1]], <8 x double> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP43]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP44:%.*]], label [[TMP45:%.*]], !prof [[PROF1]]
-; CHECK: 33:
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i3> [[TMP7]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP21]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
+; CHECK: 11:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 34:
+; CHECK: 12:
; CHECK-NEXT: [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1]], <8 x double> [[X2:%.*]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -14199,64 +13852,18 @@ define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP6]], 15
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP6]], [[TMP38]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP8]], i64 2
-; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP39]], [[TMP42]]
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP8]], i64 3
-; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP8]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i32> [[TMP8]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP8]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP8]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP8]], i64 8
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP8]], i64 9
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP8]], i64 10
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP8]], i64 11
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP8]], i64 12
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP8]], i64 13
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP8]], i64 14
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP8]], i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP34]], [[TMP50]]
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float>
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP4]], <16 x i32> [[X1:%.*]], <16 x float> [[TMP5]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP4]], <16 x i32> [[X1]], <16 x float> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP35]], 0
-; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP56:%.*]], label [[TMP57:%.*]], !prof [[PROF1]]
-; CHECK: 56:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i4> [[TMP6]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
+; CHECK: 10:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 57:
+; CHECK: 11:
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1]], <16 x float> [[X2:%.*]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float>
; CHECK-NEXT: store <16 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8
@@ -14274,64 +13881,18 @@ define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0,
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP7]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP7]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP40]], [[TMP43]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15
-; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15
-; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP52]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP52]], [[TMP53]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15
-; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15
-; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15
-; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12
-; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15
-; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13
-; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15
-; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14
-; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15
-; CHECK-NEXT: [[TMP68:%.*]] = or i32 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15
-; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15
-; CHECK-NEXT: [[TMP69:%.*]] = or i32 [[TMP35]], [[TMP36]]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float>
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X1:%.*]], <16 x float> [[TMP6]])
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X1]], <16 x float> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP69]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP57:%.*]], label [[TMP58:%.*]], !prof [[PROF1]]
-; CHECK: 57:
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i4> [[TMP7]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP21]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
+; CHECK: 11:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 58:
+; CHECK: 12:
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1]], <16 x float> [[X2:%.*]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
@@ -14357,39 +13918,16 @@ define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %
; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_q_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP20]], 7
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP20]], [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 7
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP8]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP23]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP23]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP8]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP8]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP8]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 7
-; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP8]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP8]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]], <8 x i64> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP27]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]]
-; CHECK: 29:
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1]], <8 x i64> [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i3> [[TMP8]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 30:
+; CHECK: 7:
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1]], <8 x i64> [[X2:%.*]])
; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i64> [[TMP4]]
@@ -14403,40 +13941,18 @@ define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP21]], 7
-; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP29]], 7
-; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP13]], 7
-; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7
-; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]], <8 x i64> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP37]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP38:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
-; CHECK: 30:
+; CHECK-NEXT: [[TMP13:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1]], <8 x i64> [[TMP3]])
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i3> [[TMP13]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP14]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 31:
+; CHECK: 9:
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1]], <8 x i64> [[X2:%.*]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
@@ -14460,75 +13976,28 @@ define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512(
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK: 5:
; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP42]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X0:%.*]], <16 x i32> [[_MSLD]])
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP57]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP59:%.*]], label [[TMP60:%.*]], !prof [[PROF1]]
-; CHECK: 59:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 60:
+; CHECK-NEXT: [[TMP18:%.*]] = trunc <16 x i32> [[X0:%.*]] to <16 x i4>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X0]], <16 x i32> [[_MSLD]])
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i4> [[TMP18]] to i64
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP19]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP20:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
@@ -14554,15 +14023,14 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
-; CHECK: 6:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
+; CHECK: 5:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 7:
+; CHECK: 6:
; CHECK-NEXT: [[X2S:%.*]] = load double, ptr [[X2PTR:%.*]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2PTR]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
@@ -14572,40 +14040,18 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <
; CHECK-NEXT: [[X2INS:%.*]] = insertelement <8 x double> [[EXTRA_PARAM:%.*]], double [[X2S]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> [[TMP5]], <8 x i32> zeroinitializer
; CHECK-NEXT: [[X2:%.*]] = shufflevector <8 x double> [[X2INS]], <8 x double> [[EXTRA_PARAM]], <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP27]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP27]], [[TMP12]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 7
-; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i64> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP30]], 7
-; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP30]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP34]], 7
-; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 7
-; CHECK-NEXT: [[TMP45:%.*]] = or i64 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <8 x i64> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP39:%.*]] = and i64 [[TMP38]], 7
-; CHECK-NEXT: [[TMP46:%.*]] = or i64 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <8 x i64> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP41:%.*]] = and i64 [[TMP40]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7
-; CHECK-NEXT: [[TMP47:%.*]] = or i64 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i3>
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[TMP2]] to <8 x double>
; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <8 x double>
-; CHECK-NEXT: [[TMP13:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP11]], <8 x i64> [[X0:%.*]], <8 x double> [[TMP24]])
+; CHECK-NEXT: [[TMP13:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP11]], <8 x i64> [[X0]], <8 x double> [[TMP24]])
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x double> [[TMP13]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP47]], 0
-; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP48:%.*]], label [[TMP49:%.*]], !prof [[PROF1]]
-; CHECK: 39:
+; CHECK-NEXT: [[TMP25:%.*]] = bitcast <8 x i3> [[TMP10]] to i24
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i24 [[TMP25]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP26:%.*]], label [[TMP27:%.*]], !prof [[PROF1]]
+; CHECK: 16:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 40:
+; CHECK: 17:
; CHECK-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0]], <8 x double> [[X2]])
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
@@ -14633,67 +14079,20 @@ define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0,
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP7]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP7]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP40]], [[TMP43]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15
-; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP52]], 15
-; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP52]], [[TMP20]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP9]], i64 8
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP9]], i64 9
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15
-; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP9]], i64 10
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP9]], i64 11
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15
-; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP9]], i64 12
-; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15
-; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP9]], i64 13
-; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15
-; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP9]], i64 14
-; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15
-; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP9]], i64 15
-; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15
-; CHECK-NEXT: [[TMP68:%.*]] = or i32 [[TMP35]], [[TMP36]]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[X0:%.*]] to <16 x i4>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float>
-; CHECK-NEXT: [[TMP19:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X0:%.*]], <16 x float> [[TMP6]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X0]], <16 x float> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP19]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP68]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP57:%.*]], label [[TMP58:%.*]], !prof [[PROF1]]
-; CHECK: 57:
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i4> [[TMP7]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP20:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
+; CHECK: 10:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 58:
+; CHECK: 11:
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0]], <16 x float> [[X2:%.*]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
@@ -14719,40 +14118,17 @@ define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP13]], i64 0
-; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP21]], 7
-; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP13]], i64 1
-; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP13]], i64 2
-; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP13]], i64 3
-; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP29]], 7
-; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i64> [[TMP13]], i64 4
-; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP31]], 7
-; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP31]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP13]], i64 5
-; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[TMP13]], i64 6
-; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP13]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7
-; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X0:%.*]], <8 x i64> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP37]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]]
-; CHECK: 30:
+; CHECK-NEXT: [[TMP13:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i3>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X0]], <8 x i64> [[TMP3]])
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i3> [[TMP13]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP14]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
; CHECK-NEXT: unreachable
-; CHECK: 31:
+; CHECK: 8:
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0]], <8 x i64> [[X2:%.*]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
@@ -14775,63 +14151,16 @@ define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32
; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_d_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP36]], [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 15
-; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP8]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP39]], 15
-; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 15
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP8]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i32> [[TMP8]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP8]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP8]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP8]], i64 8
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP8]], i64 9
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP8]], i64 10
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP8]], i64 11
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP8]], i64 12
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP8]], i64 13
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP8]], i64 14
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP8]], i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP34]], [[TMP50]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0:%.*]], <16 x i32> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP35]], 0
-; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP53:%.*]], label [[TMP54:%.*]], !prof [[PROF1]]
-; CHECK: 53:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 54:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[X0:%.*]] to <16 x i4>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0]], <16 x i32> [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i4> [[TMP8]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2:%.*]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i32> [[TMP4]]
@@ -14845,64 +14174,17 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP13:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP13]], i64 0
-; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP37]], 15
-; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP37]], [[TMP40]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i32> [[TMP13]], i64 1
-; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 15
-; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP13]], i64 2
-; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i32> [[TMP13]], i64 3
-; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], 15
-; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP45]], [[TMP46]]
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i32> [[TMP13]], i64 4
-; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP47]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP47]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i32> [[TMP13]], i64 5
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i32> [[TMP13]], i64 6
-; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP13]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP13]], i64 8
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15
-; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP13]], i64 9
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15
-; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP13]], i64 10
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15
-; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP13]], i64 11
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP13]], i64 12
-; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15
-; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP13]], i64 13
-; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP13]], i64 14
-; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP13]], i64 15
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP35]], 15
-; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP35]], [[TMP51]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0:%.*]], <16 x i32> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP36]], 0
-; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP54:%.*]], label [[TMP55:%.*]], !prof [[PROF1]]
-; CHECK: 54:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 55:
+; CHECK-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[X0:%.*]] to <16 x i4>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0]], <16 x i32> [[TMP3]])
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i4> [[TMP13]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2:%.*]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
index a5d387df59ff8..8cfca3b07300f 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512-intrinsics.ll
@@ -5495,74 +5495,27 @@ define <16 x i32>@test_int_x86_avx512_vpermi2var_d_512(<16 x i32> %x0, <16 x i32
; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_d_512(
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP4:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
-; CHECK: 4:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP3:%.*]], label [[TMP4:%.*]], !prof [[PROF1]]
+; CHECK: 3:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 5:
+; CHECK: 4:
; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
; CHECK-NEXT: [[TMP6:%.*]] = ptrtoint ptr [[X2P]] to i64
; CHECK-NEXT: [[TMP7:%.*]] = xor i64 [[TMP6]], 87960930222080
; CHECK-NEXT: [[TMP8:%.*]] = inttoptr i64 [[TMP7]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 64
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP41]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP41]], [[TMP10]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP12:%.*]] = and i32 [[TMP11]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <16 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP13]], 15
-; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15
-; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP39]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP39]], [[TMP40]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1:%.*]], <16 x i32> [[_MSLD]])
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP56]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP58:%.*]], label [[TMP59:%.*]], !prof [[PROF1]]
-; CHECK: 58:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 59:
+; CHECK-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1]], <16 x i32> [[_MSLD]])
+; CHECK-NEXT: [[TMP10:%.*]] = bitcast <16 x i4> [[TMP13]] to i64
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP10]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP11:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
+; CHECK: 11:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 12:
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1]], <16 x i32> [[X2]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i32> [[TMP9]]
@@ -5576,8 +5529,8 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512(
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
@@ -5590,61 +5543,15 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP42]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1:%.*]], <16 x i32> [[_MSLD]])
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP57]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP59:%.*]], label [[TMP60:%.*]], !prof [[PROF1]]
-; CHECK: 59:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 60:
+; CHECK-NEXT: [[TMP18:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X1]], <16 x i32> [[_MSLD]])
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i4> [[TMP18]] to i64
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP19]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP20:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
+; CHECK: 13:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 14:
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0:%.*]], <16 x i32> [[X1]], <16 x i32> [[X2]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
@@ -5670,42 +5577,19 @@ define <8 x double>@test_int_x86_avx512_vpermi2var_pd_512(<8 x double> %x0, <8 x
; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_pd_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 7
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP6]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP6]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP8]], i64 2
-; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP23]], 7
-; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP23]], [[TMP26]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP8]], i64 3
-; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP10]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP10]], [[TMP27]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP8]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP8]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 7
-; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP8]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP8]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7
-; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP3]] to <8 x double>
-; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP4]], <8 x i64> [[X1:%.*]], <8 x double> [[TMP5]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP4]], <8 x i64> [[X1]], <8 x double> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x double> [[TMP11]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP35]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP32:%.*]], label [[TMP33:%.*]], !prof [[PROF1]]
-; CHECK: 32:
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i3> [[TMP6]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 33:
+; CHECK: 10:
; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1]], <8 x double> [[X2:%.*]])
; CHECK-NEXT: store <8 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x double> [[TMP9]]
@@ -5721,40 +5605,18 @@ define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0,
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP7]], 7
-; CHECK-NEXT: [[TMP38:%.*]] = or i64 [[TMP7]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP24]], 7
-; CHECK-NEXT: [[TMP39:%.*]] = or i64 [[TMP24]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP28]], 7
-; CHECK-NEXT: [[TMP40:%.*]] = or i64 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP31:%.*]] = and i64 [[TMP30]], 7
-; CHECK-NEXT: [[TMP41:%.*]] = or i64 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP33:%.*]] = and i64 [[TMP32]], 7
-; CHECK-NEXT: [[TMP42:%.*]] = or i64 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP34]], 7
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP37:%.*]] = and i64 [[TMP36]], 7
-; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP36]], [[TMP37]]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i64> [[TMP3]] to <8 x double>
-; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP5]], <8 x i64> [[X1:%.*]], <8 x double> [[TMP6]])
+; CHECK-NEXT: [[TMP9:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP5]], <8 x i64> [[X1]], <8 x double> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x double> [[TMP9]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP43]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP44:%.*]], label [[TMP45:%.*]], !prof [[PROF1]]
-; CHECK: 33:
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast <8 x i3> [[TMP7]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP21]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
+; CHECK: 11:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 34:
+; CHECK: 12:
; CHECK-NEXT: [[TMP10:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X0:%.*]], <8 x i64> [[X1]], <8 x double> [[X2:%.*]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i64> [[X1]] to <8 x double>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -5783,66 +5645,19 @@ define <16 x float>@test_int_x86_avx512_vpermi2var_ps_512(<16 x float> %x0, <16
; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_ps_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP6]], 15
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP6]], [[TMP38]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP8]], i64 2
-; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP39]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP39]], [[TMP42]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i64 3
-; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP10]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP10]], [[TMP43]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP8]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i32> [[TMP8]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP8]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP8]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP8]], i64 8
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP8]], i64 9
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP8]], i64 10
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP8]], i64 11
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP8]], i64 12
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP8]], i64 13
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP8]], i64 14
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP8]], i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP34]], [[TMP50]]
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP6:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float>
-; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP4]], <16 x i32> [[X1:%.*]], <16 x float> [[TMP5]])
+; CHECK-NEXT: [[TMP11:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP4]], <16 x i32> [[X1]], <16 x float> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x float> [[TMP11]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP35]], 0
-; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP56:%.*]], label [[TMP57:%.*]], !prof [[PROF1]]
-; CHECK: 56:
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i4> [[TMP6]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP12:%.*]], label [[TMP10:%.*]], !prof [[PROF1]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 57:
+; CHECK: 10:
; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1]], <16 x float> [[X2:%.*]])
; CHECK-NEXT: store <16 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x float> [[TMP9]]
@@ -5858,64 +5673,18 @@ define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0,
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP7]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP7]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP40]], [[TMP43]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15
-; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15
-; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP52]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP52]], [[TMP53]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15
-; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15
-; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15
-; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12
-; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15
-; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13
-; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15
-; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14
-; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15
-; CHECK-NEXT: [[TMP68:%.*]] = or i32 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15
-; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15
-; CHECK-NEXT: [[TMP69:%.*]] = or i32 [[TMP35]], [[TMP36]]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float>
-; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X1:%.*]], <16 x float> [[TMP6]])
+; CHECK-NEXT: [[TMP9:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X1]], <16 x float> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP9]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP69]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP57:%.*]], label [[TMP58:%.*]], !prof [[PROF1]]
-; CHECK: 57:
+; CHECK-NEXT: [[TMP21:%.*]] = bitcast <16 x i4> [[TMP7]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP21]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP22:%.*]], label [[TMP23:%.*]], !prof [[PROF1]]
+; CHECK: 11:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 58:
+; CHECK: 12:
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X0:%.*]], <16 x i32> [[X1]], <16 x float> [[X2:%.*]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <16 x i32> [[X1]] to <16 x float>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
@@ -5944,39 +5713,16 @@ define <8 x i64>@test_int_x86_avx512_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %
; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_q_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = and i64 [[TMP20]], 7
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP20]], [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 7
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP8]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP23]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP23]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP8]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP8]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP8]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 7
-; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP8]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP8]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]], <8 x i64> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP27]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP29:%.*]], label [[TMP30:%.*]], !prof [[PROF1]]
-; CHECK: 29:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1]], <8 x i64> [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i3> [[TMP8]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 30:
+; CHECK: 7:
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1]], <8 x i64> [[X2:%.*]])
; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i64> [[TMP4]]
@@ -5989,40 +5735,18 @@ define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP21]], 7
-; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP29]], 7
-; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP13]], 7
-; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7
-; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1:%.*]], <8 x i64> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP37]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP38:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
-; CHECK: 30:
+; CHECK-NEXT: [[TMP13:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X1]], <8 x i64> [[TMP3]])
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i3> [[TMP13]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP14]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 31:
+; CHECK: 9:
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0:%.*]], <8 x i64> [[X1]], <8 x i64> [[X2:%.*]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
@@ -6045,75 +5769,28 @@ define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512(
; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP5:%.*]], label [[TMP6:%.*]], !prof [[PROF1]]
-; CHECK: 5:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP5:%.*]], !prof [[PROF1]]
+; CHECK: 4:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 6:
+; CHECK: 5:
; CHECK-NEXT: [[X2:%.*]] = load <16 x i32>, ptr [[X2P:%.*]], align 64
; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2P]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
; CHECK-NEXT: [[TMP9:%.*]] = inttoptr i64 [[TMP8]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <16 x i32>, ptr [[TMP9]], align 64
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP42]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP42]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X0:%.*]], <16 x i32> [[_MSLD]])
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP57]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP59:%.*]], label [[TMP60:%.*]], !prof [[PROF1]]
-; CHECK: 59:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 60:
+; CHECK-NEXT: [[TMP18:%.*]] = trunc <16 x i32> [[X0:%.*]] to <16 x i4>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP2]], <16 x i32> [[X0]], <16 x i32> [[_MSLD]])
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <16 x i4> [[TMP18]] to i64
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i64 [[TMP19]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP20:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
+; CHECK: 12:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 13:
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
@@ -6139,15 +5816,14 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <
; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 144) to ptr), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 208) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 136) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP1]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP11:%.*]], !prof [[PROF1]]
-; CHECK: 7:
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP10:%.*]], label [[TMP12:%.*]], !prof [[PROF1]]
+; CHECK: 6:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 8:
+; CHECK: 7:
; CHECK-NEXT: [[X2S:%.*]] = load double, ptr [[X2PTR:%.*]], align 8
; CHECK-NEXT: [[TMP7:%.*]] = ptrtoint ptr [[X2PTR]] to i64
; CHECK-NEXT: [[TMP8:%.*]] = xor i64 [[TMP7]], 87960930222080
@@ -6157,40 +5833,18 @@ define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <
; CHECK-NEXT: [[X2INS:%.*]] = insertelement <8 x double> [[EXTRA_PARAM:%.*]], double [[X2S]], i32 0
; CHECK-NEXT: [[_MSPROP1:%.*]] = shufflevector <8 x i64> [[_MSPROP]], <8 x i64> [[TMP6]], <8 x i32> zeroinitializer
; CHECK-NEXT: [[X2:%.*]] = shufflevector <8 x double> [[X2INS]], <8 x double> [[EXTRA_PARAM2:%.*]], <8 x i32> zeroinitializer
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP12]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP12]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP29]], 7
-; CHECK-NEXT: [[TMP43:%.*]] = or i64 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i64> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP34:%.*]] = and i64 [[TMP31]], 7
-; CHECK-NEXT: [[TMP44:%.*]] = or i64 [[TMP31]], [[TMP34]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <8 x i64> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP36:%.*]] = and i64 [[TMP35]], 7
-; CHECK-NEXT: [[TMP45:%.*]] = or i64 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP38:%.*]] = and i64 [[TMP37]], 7
-; CHECK-NEXT: [[TMP46:%.*]] = or i64 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <8 x i64> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP40:%.*]] = and i64 [[TMP39]], 7
-; CHECK-NEXT: [[TMP47:%.*]] = or i64 [[TMP39]], [[TMP40]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <8 x i64> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP42:%.*]] = and i64 [[TMP41]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i64> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP26]], 7
-; CHECK-NEXT: [[TMP48:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP11:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i3>
; CHECK-NEXT: [[TMP24:%.*]] = bitcast <8 x i64> [[TMP2]] to <8 x double>
; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i64> [[_MSPROP1]] to <8 x double>
-; CHECK-NEXT: [[TMP14:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP24]], <8 x i64> [[X0:%.*]], <8 x double> [[TMP13]])
+; CHECK-NEXT: [[TMP14:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[TMP24]], <8 x i64> [[X0]], <8 x double> [[TMP13]])
; CHECK-NEXT: [[TMP25:%.*]] = bitcast <8 x double> [[TMP14]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i64 [[TMP48]], 0
-; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP49:%.*]], label [[TMP50:%.*]], !prof [[PROF1]]
-; CHECK: 40:
+; CHECK-NEXT: [[TMP26:%.*]] = bitcast <8 x i3> [[TMP11]] to i24
+; CHECK-NEXT: [[_MSCMP2:%.*]] = icmp ne i24 [[TMP26]], 0
+; CHECK-NEXT: br i1 [[_MSCMP2]], label [[TMP27:%.*]], label [[TMP28:%.*]], !prof [[PROF1]]
+; CHECK: 17:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 41:
+; CHECK: 18:
; CHECK-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.x86.avx512.vpermi2var.pd.512(<8 x double> [[X1:%.*]], <8 x i64> [[X0]], <8 x double> [[X2]])
; CHECK-NEXT: [[TMP16:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP17:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
@@ -6217,67 +5871,20 @@ define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0,
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP9:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP7]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP7]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP40]], [[TMP43]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15
-; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i32> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP52]], 15
-; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP52]], [[TMP20]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP9]], i64 8
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP9]], i64 9
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15
-; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP9]], i64 10
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP9]], i64 11
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15
-; CHECK-NEXT: [[TMP64:%.*]] = or i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP9]], i64 12
-; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15
-; CHECK-NEXT: [[TMP65:%.*]] = or i32 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP9]], i64 13
-; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15
-; CHECK-NEXT: [[TMP66:%.*]] = or i32 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP9]], i64 14
-; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15
-; CHECK-NEXT: [[TMP67:%.*]] = or i32 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP9]], i64 15
-; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15
-; CHECK-NEXT: [[TMP68:%.*]] = or i32 [[TMP35]], [[TMP36]]
+; CHECK-NEXT: [[TMP7:%.*]] = trunc <16 x i32> [[X0:%.*]] to <16 x i4>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i32> [[TMP3]] to <16 x float>
-; CHECK-NEXT: [[TMP19:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X0:%.*]], <16 x float> [[TMP6]])
+; CHECK-NEXT: [[TMP19:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[TMP5]], <16 x i32> [[X0]], <16 x float> [[TMP6]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x float> [[TMP19]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP68]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP57:%.*]], label [[TMP58:%.*]], !prof [[PROF1]]
-; CHECK: 57:
+; CHECK-NEXT: [[TMP9:%.*]] = bitcast <16 x i4> [[TMP7]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP20:%.*]], label [[TMP21:%.*]], !prof [[PROF1]]
+; CHECK: 10:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 58:
+; CHECK: 11:
; CHECK-NEXT: [[TMP10:%.*]] = call <16 x float> @llvm.x86.avx512.vpermi2var.ps.512(<16 x float> [[X1:%.*]], <16 x i32> [[X0]], <16 x float> [[X2:%.*]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
@@ -6301,40 +5908,17 @@ define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP13]], i64 0
-; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP21]], 7
-; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP13]], i64 1
-; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i64> [[TMP13]], i64 2
-; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP27]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i64> [[TMP13]], i64 3
-; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP29]], 7
-; CHECK-NEXT: [[TMP34:%.*]] = or i64 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <8 x i64> [[TMP13]], i64 4
-; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP31]], 7
-; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP31]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP13]], i64 5
-; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[TMP13]], i64 6
-; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP13]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7
-; CHECK-NEXT: [[TMP37:%.*]] = or i64 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X0:%.*]], <8 x i64> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP37]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP38:%.*]], label [[TMP39:%.*]], !prof [[PROF1]]
-; CHECK: 30:
+; CHECK-NEXT: [[TMP13:%.*]] = trunc <8 x i64> [[X0:%.*]] to <8 x i3>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP1]], <8 x i64> [[X0]], <8 x i64> [[TMP3]])
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <8 x i3> [[TMP13]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP14]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 31:
+; CHECK: 8:
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X1:%.*]], <8 x i64> [[X0]], <8 x i64> [[X2:%.*]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[X3:%.*]] to <8 x i1>
@@ -6357,63 +5941,16 @@ define <16 x i32>@test_int_x86_avx512_vpermt2var_d_512(<16 x i32> %x0, <16 x i32
; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_d_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP8:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP8]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP36]], [[TMP5]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP8]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 15
-; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP8]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP39]], 15
-; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP39]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP8]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 15
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP8]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i32> [[TMP8]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP8]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP8]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP8]], i64 8
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP8]], i64 9
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP8]], i64 10
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP8]], i64 11
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP8]], i64 12
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP8]], i64 13
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP8]], i64 14
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP8]], i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP34]], [[TMP50]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0:%.*]], <16 x i32> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP35]], 0
-; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP53:%.*]], label [[TMP54:%.*]], !prof [[PROF1]]
-; CHECK: 53:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 54:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <16 x i32> [[X0:%.*]] to <16 x i4>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0]], <16 x i32> [[TMP3]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i4> [[TMP8]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2:%.*]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i32> [[TMP4]]
@@ -6426,64 +5963,17 @@ define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP13:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP13]], i64 0
-; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP37]], 15
-; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP37]], [[TMP40]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i32> [[TMP13]], i64 1
-; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 15
-; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP13]], i64 2
-; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i32> [[TMP13]], i64 3
-; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], 15
-; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP45]], [[TMP46]]
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i32> [[TMP13]], i64 4
-; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP47]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP47]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i32> [[TMP13]], i64 5
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i32> [[TMP13]], i64 6
-; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP13]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP13]], i64 8
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15
-; CHECK-NEXT: [[TMP58:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP13]], i64 9
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15
-; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP13]], i64 10
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15
-; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP13]], i64 11
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15
-; CHECK-NEXT: [[TMP61:%.*]] = or i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP13]], i64 12
-; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15
-; CHECK-NEXT: [[TMP62:%.*]] = or i32 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP13]], i64 13
-; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15
-; CHECK-NEXT: [[TMP63:%.*]] = or i32 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP13]], i64 14
-; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP13]], i64 15
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP35]], 15
-; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP35]], [[TMP51]]
-; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0:%.*]], <16 x i32> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP28:%.*]] = icmp ne i32 [[TMP36]], 0
-; CHECK-NEXT: br i1 [[_MSCMP28]], label [[TMP54:%.*]], label [[TMP55:%.*]], !prof [[PROF1]]
-; CHECK: 54:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
-; CHECK-NEXT: unreachable
-; CHECK: 55:
+; CHECK-NEXT: [[TMP13:%.*]] = trunc <16 x i32> [[X0:%.*]] to <16 x i4>
+; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP1]], <16 x i32> [[X0]], <16 x i32> [[TMP3]])
+; CHECK-NEXT: [[TMP14:%.*]] = bitcast <16 x i4> [[TMP13]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP15:%.*]], label [[TMP16:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
; CHECK-NEXT: [[TMP5:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X1:%.*]], <16 x i32> [[X0]], <16 x i32> [[X2:%.*]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast i16 [[TMP4]] to <16 x i1>
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i16 [[X3:%.*]] to <16 x i1>
@@ -8988,41 +8478,18 @@ declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1) #0 {
; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP3]], 7
-; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[TMP3]], [[TMP27]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP6]], 7
-; CHECK-NEXT: [[TMP8:%.*]] = or i64 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP9]], 7
-; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7
-; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 7
-; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7
-; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP22:%.*]] = and i64 [[TMP21]], 7
-; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP26]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP30:%.*]], label [[TMP31:%.*]], !prof [[PROF1]]
-; CHECK: 30:
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x i3> [[TMP2]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 31:
+; CHECK: 8:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X2:%.*]], <8 x i64> [[X1]])
; CHECK-NEXT: store <8 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x double> [[RES1]]
@@ -9034,43 +8501,20 @@ define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512(<8 x double> %x0, <8 x
define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %mask) #0 {
; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512_mask(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP5]], 7
-; CHECK-NEXT: [[TMP7:%.*]] = or i64 [[TMP5]], [[TMP29]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP30:%.*]] = and i64 [[TMP8]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP8]], [[TMP30]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP35:%.*]] = and i64 [[TMP34]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i64 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP38:%.*]] = and i64 [[TMP37]], 7
-; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 7
-; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 7
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP24:%.*]] = and i64 [[TMP23]], 7
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP26]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP28]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP32:%.*]], label [[TMP33:%.*]], !prof [[PROF1]]
-; CHECK: 32:
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i3> [[TMP5]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 33:
+; CHECK: 10:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X3:%.*]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
@@ -9094,42 +8538,19 @@ define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_mask(<8 x double> %x0,
define <8 x double>@test_int_x86_avx512_vpermilvar_pd_512_maskz(<8 x double> %x0, <8 x i64> %x1, i8 %mask) #0 {
; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_pd_512_maskz(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP4]], 7
-; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[TMP4]], [[TMP28]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP29:%.*]] = and i64 [[TMP7]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP7]], [[TMP29]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP34:%.*]] = and i64 [[TMP33]], 7
-; CHECK-NEXT: [[TMP35:%.*]] = or i64 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <8 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP36]], 7
-; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP36]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7
-; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i64> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP20:%.*]] = and i64 [[TMP19]], 7
-; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i64> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], 7
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i64> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP26:%.*]] = and i64 [[TMP25]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i64> [[X1:%.*]] to <8 x i3>
; CHECK-NEXT: [[X0:%.*]] = bitcast <8 x i64> [[TMP1]] to <8 x double>
-; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X0]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x double> [[RES]] to <8 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP27]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP31:%.*]], label [[TMP32:%.*]], !prof [[PROF1]]
-; CHECK: 31:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i3> [[TMP4]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 32:
+; CHECK: 9:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[X2:%.*]], <8 x i64> [[X1]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[TMP3]] to <8 x i1>
; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
@@ -9154,65 +8575,18 @@ declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>
define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1) #0 {
; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP3:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP3]], 15
-; CHECK-NEXT: [[TMP5:%.*]] = or i32 [[TMP3]], [[TMP51]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP6]], 15
-; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 15
-; CHECK-NEXT: [[TMP11:%.*]] = or i32 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15
-; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 15
-; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 15
-; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 15
-; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10
-; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 15
-; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11
-; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP39:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12
-; CHECK-NEXT: [[TMP40:%.*]] = and i32 [[TMP39]], 15
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP39]], [[TMP40]]
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13
-; CHECK-NEXT: [[TMP43:%.*]] = and i32 [[TMP42]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP42]], [[TMP43]]
-; CHECK-NEXT: [[TMP45:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14
-; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP45]], [[TMP46]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15
-; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP48]], 15
-; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP48]], [[TMP49]]
+; CHECK-NEXT: [[TMP2:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1]])
; CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP50]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP54:%.*]], label [[TMP55:%.*]], !prof [[PROF1]]
-; CHECK: 54:
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x i4> [[TMP2]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 55:
+; CHECK: 8:
; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X2:%.*]], <16 x i32> [[X1]])
; CHECK-NEXT: store <16 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x float> [[RES1]]
@@ -9224,67 +8598,20 @@ define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512(<16 x float> %x0, <16
define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %mask) #0 {
; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_mask(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP5]], 15
-; CHECK-NEXT: [[TMP7:%.*]] = or i32 [[TMP5]], [[TMP53]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP54:%.*]] = and i32 [[TMP8]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP8]], [[TMP54]]
-; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP59:%.*]] = and i32 [[TMP58]], 15
-; CHECK-NEXT: [[TMP60:%.*]] = or i32 [[TMP58]], [[TMP59]]
-; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP62:%.*]] = and i32 [[TMP61]], 15
-; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP61]], [[TMP62]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 15
-; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 15
-; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8
-; CHECK-NEXT: [[TMP30:%.*]] = and i32 [[TMP29]], 15
-; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP34:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10
-; CHECK-NEXT: [[TMP36:%.*]] = and i32 [[TMP35]], 15
-; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15
-; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12
-; CHECK-NEXT: [[TMP42:%.*]] = and i32 [[TMP41]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13
-; CHECK-NEXT: [[TMP45:%.*]] = and i32 [[TMP44]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14
-; CHECK-NEXT: [[TMP48:%.*]] = and i32 [[TMP47]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP47]], [[TMP48]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15
-; CHECK-NEXT: [[TMP51:%.*]] = and i32 [[TMP50]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP50]], [[TMP51]]
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP52]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP56:%.*]], label [[TMP57:%.*]], !prof [[PROF1]]
-; CHECK: 56:
+; CHECK-NEXT: [[TMP8:%.*]] = bitcast <16 x i4> [[TMP5]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP8]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP16:%.*]], label [[TMP17:%.*]], !prof [[PROF1]]
+; CHECK: 9:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 57:
+; CHECK: 10:
; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X3:%.*]], <16 x i32> [[X1]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
@@ -9308,66 +8635,19 @@ define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_mask(<16 x float> %x0,
define <16 x float>@test_int_x86_avx512_vpermilvar_ps_512_maskz(<16 x float> %x0, <16 x i32> %x1, i16 %mask) #0 {
; CHECK-LABEL: @test_int_x86_avx512_vpermilvar_ps_512_maskz(
; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <16 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP52:%.*]] = and i32 [[TMP4]], 15
-; CHECK-NEXT: [[TMP6:%.*]] = or i32 [[TMP4]], [[TMP52]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <16 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP53:%.*]] = and i32 [[TMP7]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP7]], [[TMP53]]
-; CHECK-NEXT: [[TMP57:%.*]] = extractelement <16 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP58:%.*]] = and i32 [[TMP57]], 15
-; CHECK-NEXT: [[TMP59:%.*]] = or i32 [[TMP57]], [[TMP58]]
-; CHECK-NEXT: [[TMP60:%.*]] = extractelement <16 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP60]], 15
-; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP60]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15
-; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <16 x i32> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 15
-; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <16 x i32> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 15
-; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP2]], i64 8
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[TMP2]], i64 9
-; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP31]], 15
-; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP2]], i64 10
-; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i32> [[TMP2]], i64 11
-; CHECK-NEXT: [[TMP38:%.*]] = and i32 [[TMP37]], 15
-; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP2]], i64 12
-; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i32> [[TMP2]], i64 13
-; CHECK-NEXT: [[TMP44:%.*]] = and i32 [[TMP43]], 15
-; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i32> [[TMP2]], i64 14
-; CHECK-NEXT: [[TMP47:%.*]] = and i32 [[TMP46]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP49:%.*]] = extractelement <16 x i32> [[TMP2]], i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = and i32 [[TMP49]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP49]], [[TMP50]]
+; CHECK-NEXT: [[TMP4:%.*]] = trunc <16 x i32> [[X1:%.*]] to <16 x i4>
; CHECK-NEXT: [[X0:%.*]] = bitcast <16 x i32> [[TMP1]] to <16 x float>
-; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X0]], <16 x i32> [[X1]])
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x float> [[RES]] to <16 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP51]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP55:%.*]], label [[TMP56:%.*]], !prof [[PROF1]]
-; CHECK: 55:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <16 x i4> [[TMP4]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR10]]
; CHECK-NEXT: unreachable
-; CHECK: 56:
+; CHECK: 9:
; CHECK-NEXT: [[RES1:%.*]] = call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[X2:%.*]], <16 x i32> [[X1]])
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i16 [[TMP3]] to <16 x i1>
; CHECK-NEXT: [[MASK_CAST:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
index db4ad6b8fc28b..a41f26a0e3c1c 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics-upgrade.ll
@@ -5096,111 +5096,16 @@ define <32 x i16> @test_int_x86_avx512_vpermt2var_hi_512(<32 x i16> %x0, <32 x i
; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_hi_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = and i16 [[TMP4]], 31
-; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP8:%.*]] = and i16 [[TMP7]], 31
-; CHECK-NEXT: [[TMP9:%.*]] = or i16 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 31
-; CHECK-NEXT: [[TMP12:%.*]] = or i16 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP14:%.*]] = and i16 [[TMP13]], 31
-; CHECK-NEXT: [[TMP15:%.*]] = or i16 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 31
-; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP20:%.*]] = and i16 [[TMP19]], 31
-; CHECK-NEXT: [[TMP21:%.*]] = or i16 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 31
-; CHECK-NEXT: [[TMP24:%.*]] = or i16 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP26:%.*]] = and i16 [[TMP25]], 31
-; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 31
-; CHECK-NEXT: [[TMP30:%.*]] = or i16 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP32:%.*]] = and i16 [[TMP31]], 31
-; CHECK-NEXT: [[TMP33:%.*]] = or i16 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 31
-; CHECK-NEXT: [[TMP36:%.*]] = or i16 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP38:%.*]] = and i16 [[TMP37]], 31
-; CHECK-NEXT: [[TMP39:%.*]] = or i16 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 31
-; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP44:%.*]] = and i16 [[TMP43]], 31
-; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP47:%.*]] = and i16 [[TMP46]], 31
-; CHECK-NEXT: [[TMP48:%.*]] = or i16 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = and i16 [[TMP49]], 31
-; CHECK-NEXT: [[TMP51:%.*]] = or i16 [[TMP49]], [[TMP50]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16
-; CHECK-NEXT: [[TMP53:%.*]] = and i16 [[TMP52]], 31
-; CHECK-NEXT: [[TMP54:%.*]] = or i16 [[TMP52]], [[TMP53]]
-; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17
-; CHECK-NEXT: [[TMP56:%.*]] = and i16 [[TMP55]], 31
-; CHECK-NEXT: [[TMP57:%.*]] = or i16 [[TMP55]], [[TMP56]]
-; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18
-; CHECK-NEXT: [[TMP59:%.*]] = and i16 [[TMP58]], 31
-; CHECK-NEXT: [[TMP60:%.*]] = or i16 [[TMP58]], [[TMP59]]
-; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19
-; CHECK-NEXT: [[TMP62:%.*]] = and i16 [[TMP61]], 31
-; CHECK-NEXT: [[TMP63:%.*]] = or i16 [[TMP61]], [[TMP62]]
-; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20
-; CHECK-NEXT: [[TMP65:%.*]] = and i16 [[TMP64]], 31
-; CHECK-NEXT: [[TMP66:%.*]] = or i16 [[TMP64]], [[TMP65]]
-; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21
-; CHECK-NEXT: [[TMP68:%.*]] = and i16 [[TMP67]], 31
-; CHECK-NEXT: [[TMP69:%.*]] = or i16 [[TMP67]], [[TMP68]]
-; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22
-; CHECK-NEXT: [[TMP71:%.*]] = and i16 [[TMP70]], 31
-; CHECK-NEXT: [[TMP72:%.*]] = or i16 [[TMP70]], [[TMP71]]
-; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23
-; CHECK-NEXT: [[TMP74:%.*]] = and i16 [[TMP73]], 31
-; CHECK-NEXT: [[TMP75:%.*]] = or i16 [[TMP73]], [[TMP74]]
-; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24
-; CHECK-NEXT: [[TMP77:%.*]] = and i16 [[TMP76]], 31
-; CHECK-NEXT: [[TMP78:%.*]] = or i16 [[TMP76]], [[TMP77]]
-; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25
-; CHECK-NEXT: [[TMP80:%.*]] = and i16 [[TMP79]], 31
-; CHECK-NEXT: [[TMP81:%.*]] = or i16 [[TMP79]], [[TMP80]]
-; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26
-; CHECK-NEXT: [[TMP83:%.*]] = and i16 [[TMP82]], 31
-; CHECK-NEXT: [[TMP84:%.*]] = or i16 [[TMP82]], [[TMP83]]
-; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27
-; CHECK-NEXT: [[TMP86:%.*]] = and i16 [[TMP85]], 31
-; CHECK-NEXT: [[TMP87:%.*]] = or i16 [[TMP85]], [[TMP86]]
-; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28
-; CHECK-NEXT: [[TMP89:%.*]] = and i16 [[TMP88]], 31
-; CHECK-NEXT: [[TMP90:%.*]] = or i16 [[TMP88]], [[TMP89]]
-; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29
-; CHECK-NEXT: [[TMP92:%.*]] = and i16 [[TMP91]], 31
-; CHECK-NEXT: [[TMP93:%.*]] = or i16 [[TMP91]], [[TMP92]]
-; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30
-; CHECK-NEXT: [[TMP95:%.*]] = and i16 [[TMP94]], 31
-; CHECK-NEXT: [[TMP96:%.*]] = or i16 [[TMP94]], [[TMP95]]
-; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31
-; CHECK-NEXT: [[TMP98:%.*]] = and i16 [[TMP97]], 31
-; CHECK-NEXT: [[TMP99:%.*]] = or i16 [[TMP97]], [[TMP98]]
-; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP99]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP101:%.*]], label [[TMP102:%.*]], !prof [[PROF1]]
-; CHECK: 101:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i5>
+; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0]], <32 x i16> [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i5> [[TMP3]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
; CHECK-NEXT: unreachable
-; CHECK: 102:
+; CHECK: 7:
; CHECK-NEXT: [[TMP103:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]])
; CHECK-NEXT: store <32 x i16> [[TMP100]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP103]]
@@ -5213,112 +5118,17 @@ define <32 x i16> @test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <3
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31
-; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31
-; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31
-; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31
-; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31
-; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31
-; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31
-; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31
-; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31
-; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31
-; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31
-; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31
-; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31
-; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31
-; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31
-; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31
-; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16
-; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31
-; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]]
-; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17
-; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31
-; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]]
-; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18
-; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31
-; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]]
-; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19
-; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31
-; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]]
-; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20
-; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31
-; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]]
-; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21
-; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31
-; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]]
-; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22
-; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31
-; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]]
-; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23
-; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31
-; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]]
-; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24
-; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31
-; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]]
-; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25
-; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31
-; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]]
-; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26
-; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31
-; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]]
-; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27
-; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31
-; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]]
-; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28
-; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31
-; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]]
-; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29
-; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31
-; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]]
-; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30
-; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31
-; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]]
-; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31
-; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31
-; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]]
-; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]]
-; CHECK: 102:
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i5>
+; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0]], <32 x i16> [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i5> [[TMP5]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
; CHECK-NEXT: unreachable
-; CHECK: 103:
+; CHECK: 8:
; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]])
; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1>
; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1>
@@ -5341,112 +5151,17 @@ define <32 x i16> @test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31
-; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31
-; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31
-; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31
-; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31
-; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31
-; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31
-; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31
-; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31
-; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31
-; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31
-; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31
-; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31
-; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31
-; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31
-; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31
-; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16
-; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31
-; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]]
-; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17
-; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31
-; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]]
-; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18
-; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31
-; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]]
-; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19
-; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31
-; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]]
-; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20
-; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31
-; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]]
-; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21
-; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31
-; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]]
-; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22
-; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31
-; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]]
-; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23
-; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31
-; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]]
-; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24
-; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31
-; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]]
-; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25
-; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31
-; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]]
-; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26
-; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31
-; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]]
-; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27
-; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31
-; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]]
-; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28
-; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31
-; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]]
-; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29
-; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31
-; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]]
-; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30
-; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31
-; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]]
-; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31
-; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31
-; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]]
-; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]]
-; CHECK: 102:
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i5>
+; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0]], <32 x i16> [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i5> [[TMP5]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
; CHECK-NEXT: unreachable
-; CHECK: 103:
+; CHECK: 8:
; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]])
; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1>
; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1>
@@ -5469,111 +5184,16 @@ define <32 x i16> @test_int_x86_avx512_vpermi2var_hi_512(<32 x i16> %x0, <32 x i
; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_hi_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = and i16 [[TMP4]], 31
-; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP8:%.*]] = and i16 [[TMP7]], 31
-; CHECK-NEXT: [[TMP9:%.*]] = or i16 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 31
-; CHECK-NEXT: [[TMP12:%.*]] = or i16 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP14:%.*]] = and i16 [[TMP13]], 31
-; CHECK-NEXT: [[TMP15:%.*]] = or i16 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 31
-; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP20:%.*]] = and i16 [[TMP19]], 31
-; CHECK-NEXT: [[TMP21:%.*]] = or i16 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 31
-; CHECK-NEXT: [[TMP24:%.*]] = or i16 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP26:%.*]] = and i16 [[TMP25]], 31
-; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 31
-; CHECK-NEXT: [[TMP30:%.*]] = or i16 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP32:%.*]] = and i16 [[TMP31]], 31
-; CHECK-NEXT: [[TMP33:%.*]] = or i16 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 31
-; CHECK-NEXT: [[TMP36:%.*]] = or i16 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP38:%.*]] = and i16 [[TMP37]], 31
-; CHECK-NEXT: [[TMP39:%.*]] = or i16 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 31
-; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP44:%.*]] = and i16 [[TMP43]], 31
-; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP47:%.*]] = and i16 [[TMP46]], 31
-; CHECK-NEXT: [[TMP48:%.*]] = or i16 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = and i16 [[TMP49]], 31
-; CHECK-NEXT: [[TMP51:%.*]] = or i16 [[TMP49]], [[TMP50]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16
-; CHECK-NEXT: [[TMP53:%.*]] = and i16 [[TMP52]], 31
-; CHECK-NEXT: [[TMP54:%.*]] = or i16 [[TMP52]], [[TMP53]]
-; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17
-; CHECK-NEXT: [[TMP56:%.*]] = and i16 [[TMP55]], 31
-; CHECK-NEXT: [[TMP57:%.*]] = or i16 [[TMP55]], [[TMP56]]
-; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18
-; CHECK-NEXT: [[TMP59:%.*]] = and i16 [[TMP58]], 31
-; CHECK-NEXT: [[TMP60:%.*]] = or i16 [[TMP58]], [[TMP59]]
-; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19
-; CHECK-NEXT: [[TMP62:%.*]] = and i16 [[TMP61]], 31
-; CHECK-NEXT: [[TMP63:%.*]] = or i16 [[TMP61]], [[TMP62]]
-; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20
-; CHECK-NEXT: [[TMP65:%.*]] = and i16 [[TMP64]], 31
-; CHECK-NEXT: [[TMP66:%.*]] = or i16 [[TMP64]], [[TMP65]]
-; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21
-; CHECK-NEXT: [[TMP68:%.*]] = and i16 [[TMP67]], 31
-; CHECK-NEXT: [[TMP69:%.*]] = or i16 [[TMP67]], [[TMP68]]
-; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22
-; CHECK-NEXT: [[TMP71:%.*]] = and i16 [[TMP70]], 31
-; CHECK-NEXT: [[TMP72:%.*]] = or i16 [[TMP70]], [[TMP71]]
-; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23
-; CHECK-NEXT: [[TMP74:%.*]] = and i16 [[TMP73]], 31
-; CHECK-NEXT: [[TMP75:%.*]] = or i16 [[TMP73]], [[TMP74]]
-; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24
-; CHECK-NEXT: [[TMP77:%.*]] = and i16 [[TMP76]], 31
-; CHECK-NEXT: [[TMP78:%.*]] = or i16 [[TMP76]], [[TMP77]]
-; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25
-; CHECK-NEXT: [[TMP80:%.*]] = and i16 [[TMP79]], 31
-; CHECK-NEXT: [[TMP81:%.*]] = or i16 [[TMP79]], [[TMP80]]
-; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26
-; CHECK-NEXT: [[TMP83:%.*]] = and i16 [[TMP82]], 31
-; CHECK-NEXT: [[TMP84:%.*]] = or i16 [[TMP82]], [[TMP83]]
-; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27
-; CHECK-NEXT: [[TMP86:%.*]] = and i16 [[TMP85]], 31
-; CHECK-NEXT: [[TMP87:%.*]] = or i16 [[TMP85]], [[TMP86]]
-; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28
-; CHECK-NEXT: [[TMP89:%.*]] = and i16 [[TMP88]], 31
-; CHECK-NEXT: [[TMP90:%.*]] = or i16 [[TMP88]], [[TMP89]]
-; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29
-; CHECK-NEXT: [[TMP92:%.*]] = and i16 [[TMP91]], 31
-; CHECK-NEXT: [[TMP93:%.*]] = or i16 [[TMP91]], [[TMP92]]
-; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30
-; CHECK-NEXT: [[TMP95:%.*]] = and i16 [[TMP94]], 31
-; CHECK-NEXT: [[TMP96:%.*]] = or i16 [[TMP94]], [[TMP95]]
-; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31
-; CHECK-NEXT: [[TMP98:%.*]] = and i16 [[TMP97]], 31
-; CHECK-NEXT: [[TMP99:%.*]] = or i16 [[TMP97]], [[TMP98]]
-; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1:%.*]], <32 x i16> [[TMP2]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP99]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP101:%.*]], label [[TMP102:%.*]], !prof [[PROF1]]
-; CHECK: 101:
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[X1:%.*]] to <32 x i5>
+; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1]], <32 x i16> [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i5> [[TMP3]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
; CHECK-NEXT: unreachable
-; CHECK: 102:
+; CHECK: 7:
; CHECK-NEXT: [[TMP103:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1]], <32 x i16> [[X2:%.*]])
; CHECK-NEXT: store <32 x i16> [[TMP100]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP103]]
@@ -5586,112 +5206,18 @@ define <32 x i16> @test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <3
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31
-; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31
-; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31
-; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31
-; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31
-; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31
-; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31
-; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31
-; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31
-; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31
-; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31
-; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31
-; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31
-; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31
-; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31
-; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31
-; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16
-; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31
-; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]]
-; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17
-; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31
-; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]]
-; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18
-; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31
-; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]]
-; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19
-; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31
-; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]]
-; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20
-; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31
-; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]]
-; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21
-; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31
-; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]]
-; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22
-; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31
-; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]]
-; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23
-; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31
-; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]]
-; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24
-; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31
-; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]]
-; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25
-; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31
-; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]]
-; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26
-; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31
-; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]]
-; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27
-; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31
-; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]]
-; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28
-; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31
-; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]]
-; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29
-; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31
-; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]]
-; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30
-; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31
-; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]]
-; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31
-; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31
-; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]]
-; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1:%.*]], <32 x i16> [[TMP2]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]]
-; CHECK: 102:
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i16> [[X1:%.*]] to <32 x i5>
+; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1]], <32 x i16> [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i5> [[TMP5]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR7]]
; CHECK-NEXT: unreachable
-; CHECK: 103:
+; CHECK: 9:
; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1]], <32 x i16> [[X2:%.*]])
; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1>
; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
index 636409fe20f83..e4c1e71721030 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512bw-intrinsics.ll
@@ -1477,111 +1477,16 @@ define <32 x i16>@test_int_x86_avx512_vpermt2var_hi_512(<32 x i16> %x0, <32 x i1
; CHECK-LABEL: @test_int_x86_avx512_vpermt2var_hi_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = and i16 [[TMP4]], 31
-; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP8:%.*]] = and i16 [[TMP7]], 31
-; CHECK-NEXT: [[TMP9:%.*]] = or i16 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 31
-; CHECK-NEXT: [[TMP12:%.*]] = or i16 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP14:%.*]] = and i16 [[TMP13]], 31
-; CHECK-NEXT: [[TMP15:%.*]] = or i16 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 31
-; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP20:%.*]] = and i16 [[TMP19]], 31
-; CHECK-NEXT: [[TMP21:%.*]] = or i16 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 31
-; CHECK-NEXT: [[TMP24:%.*]] = or i16 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP26:%.*]] = and i16 [[TMP25]], 31
-; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 31
-; CHECK-NEXT: [[TMP30:%.*]] = or i16 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP32:%.*]] = and i16 [[TMP31]], 31
-; CHECK-NEXT: [[TMP33:%.*]] = or i16 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 31
-; CHECK-NEXT: [[TMP36:%.*]] = or i16 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP38:%.*]] = and i16 [[TMP37]], 31
-; CHECK-NEXT: [[TMP39:%.*]] = or i16 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 31
-; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP44:%.*]] = and i16 [[TMP43]], 31
-; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP47:%.*]] = and i16 [[TMP46]], 31
-; CHECK-NEXT: [[TMP48:%.*]] = or i16 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = and i16 [[TMP49]], 31
-; CHECK-NEXT: [[TMP51:%.*]] = or i16 [[TMP49]], [[TMP50]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16
-; CHECK-NEXT: [[TMP53:%.*]] = and i16 [[TMP52]], 31
-; CHECK-NEXT: [[TMP54:%.*]] = or i16 [[TMP52]], [[TMP53]]
-; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17
-; CHECK-NEXT: [[TMP56:%.*]] = and i16 [[TMP55]], 31
-; CHECK-NEXT: [[TMP57:%.*]] = or i16 [[TMP55]], [[TMP56]]
-; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18
-; CHECK-NEXT: [[TMP59:%.*]] = and i16 [[TMP58]], 31
-; CHECK-NEXT: [[TMP60:%.*]] = or i16 [[TMP58]], [[TMP59]]
-; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19
-; CHECK-NEXT: [[TMP62:%.*]] = and i16 [[TMP61]], 31
-; CHECK-NEXT: [[TMP63:%.*]] = or i16 [[TMP61]], [[TMP62]]
-; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20
-; CHECK-NEXT: [[TMP65:%.*]] = and i16 [[TMP64]], 31
-; CHECK-NEXT: [[TMP66:%.*]] = or i16 [[TMP64]], [[TMP65]]
-; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21
-; CHECK-NEXT: [[TMP68:%.*]] = and i16 [[TMP67]], 31
-; CHECK-NEXT: [[TMP69:%.*]] = or i16 [[TMP67]], [[TMP68]]
-; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22
-; CHECK-NEXT: [[TMP71:%.*]] = and i16 [[TMP70]], 31
-; CHECK-NEXT: [[TMP72:%.*]] = or i16 [[TMP70]], [[TMP71]]
-; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23
-; CHECK-NEXT: [[TMP74:%.*]] = and i16 [[TMP73]], 31
-; CHECK-NEXT: [[TMP75:%.*]] = or i16 [[TMP73]], [[TMP74]]
-; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24
-; CHECK-NEXT: [[TMP77:%.*]] = and i16 [[TMP76]], 31
-; CHECK-NEXT: [[TMP78:%.*]] = or i16 [[TMP76]], [[TMP77]]
-; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25
-; CHECK-NEXT: [[TMP80:%.*]] = and i16 [[TMP79]], 31
-; CHECK-NEXT: [[TMP81:%.*]] = or i16 [[TMP79]], [[TMP80]]
-; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26
-; CHECK-NEXT: [[TMP83:%.*]] = and i16 [[TMP82]], 31
-; CHECK-NEXT: [[TMP84:%.*]] = or i16 [[TMP82]], [[TMP83]]
-; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27
-; CHECK-NEXT: [[TMP86:%.*]] = and i16 [[TMP85]], 31
-; CHECK-NEXT: [[TMP87:%.*]] = or i16 [[TMP85]], [[TMP86]]
-; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28
-; CHECK-NEXT: [[TMP89:%.*]] = and i16 [[TMP88]], 31
-; CHECK-NEXT: [[TMP90:%.*]] = or i16 [[TMP88]], [[TMP89]]
-; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29
-; CHECK-NEXT: [[TMP92:%.*]] = and i16 [[TMP91]], 31
-; CHECK-NEXT: [[TMP93:%.*]] = or i16 [[TMP91]], [[TMP92]]
-; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30
-; CHECK-NEXT: [[TMP95:%.*]] = and i16 [[TMP94]], 31
-; CHECK-NEXT: [[TMP96:%.*]] = or i16 [[TMP94]], [[TMP95]]
-; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31
-; CHECK-NEXT: [[TMP98:%.*]] = and i16 [[TMP97]], 31
-; CHECK-NEXT: [[TMP99:%.*]] = or i16 [[TMP97]], [[TMP98]]
-; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP99]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP101:%.*]], label [[TMP102:%.*]], !prof [[PROF1]]
-; CHECK: 101:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 102:
+; CHECK-NEXT: call void @llvm.donothing()
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i5>
+; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0]], <32 x i16> [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i5> [[TMP3]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
; CHECK-NEXT: [[TMP103:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]])
; CHECK-NEXT: store <32 x i16> [[TMP100]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP103]]
@@ -1594,112 +1499,17 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_hi_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31
-; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31
-; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31
-; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31
-; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31
-; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31
-; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31
-; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31
-; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31
-; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31
-; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31
-; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31
-; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31
-; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31
-; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31
-; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31
-; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16
-; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31
-; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]]
-; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17
-; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31
-; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]]
-; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18
-; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31
-; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]]
-; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19
-; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31
-; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]]
-; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20
-; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31
-; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]]
-; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21
-; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31
-; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]]
-; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22
-; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31
-; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]]
-; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23
-; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31
-; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]]
-; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24
-; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31
-; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]]
-; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25
-; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31
-; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]]
-; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26
-; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31
-; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]]
-; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27
-; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31
-; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]]
-; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28
-; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31
-; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]]
-; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29
-; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31
-; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]]
-; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30
-; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31
-; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]]
-; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31
-; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31
-; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]]
-; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]]
-; CHECK: 102:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 103:
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i5>
+; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0]], <32 x i16> [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i5> [[TMP5]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]])
; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1>
; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1>
@@ -1722,112 +1532,17 @@ define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <3
; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_hi_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31
-; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31
-; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31
-; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31
-; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31
-; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31
-; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31
-; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31
-; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31
-; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31
-; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31
-; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31
-; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31
-; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31
-; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31
-; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31
-; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16
-; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31
-; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]]
-; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17
-; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31
-; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]]
-; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18
-; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31
-; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]]
-; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19
-; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31
-; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]]
-; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20
-; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31
-; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]]
-; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21
-; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31
-; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]]
-; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22
-; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31
-; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]]
-; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23
-; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31
-; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]]
-; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24
-; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31
-; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]]
-; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25
-; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31
-; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]]
-; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26
-; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31
-; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]]
-; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27
-; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31
-; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]]
-; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28
-; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31
-; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]]
-; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29
-; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31
-; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]]
-; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30
-; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31
-; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]]
-; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31
-; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31
-; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]]
-; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0:%.*]], <32 x i16> [[TMP2]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]]
-; CHECK: 102:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 103:
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i16> [[X0:%.*]] to <32 x i5>
+; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X0]], <32 x i16> [[TMP2]])
+; CHECK-NEXT: [[TMP6:%.*]] = bitcast <32 x i5> [[TMP5]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP6]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof [[PROF1]]
+; CHECK: 7:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: unreachable
+; CHECK: 8:
; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X1:%.*]], <32 x i16> [[X0]], <32 x i16> [[X2:%.*]])
; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1>
; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1>
@@ -1852,111 +1567,16 @@ define <32 x i16>@test_int_x86_avx512_vpermi2var_hi_512(<32 x i16> %x0, <32 x i1
; CHECK-LABEL: @test_int_x86_avx512_vpermi2var_hi_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP5:%.*]] = and i16 [[TMP4]], 31
-; CHECK-NEXT: [[TMP6:%.*]] = or i16 [[TMP4]], [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP8:%.*]] = and i16 [[TMP7]], 31
-; CHECK-NEXT: [[TMP9:%.*]] = or i16 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 31
-; CHECK-NEXT: [[TMP12:%.*]] = or i16 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP14:%.*]] = and i16 [[TMP13]], 31
-; CHECK-NEXT: [[TMP15:%.*]] = or i16 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 31
-; CHECK-NEXT: [[TMP18:%.*]] = or i16 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP20:%.*]] = and i16 [[TMP19]], 31
-; CHECK-NEXT: [[TMP21:%.*]] = or i16 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 31
-; CHECK-NEXT: [[TMP24:%.*]] = or i16 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP26:%.*]] = and i16 [[TMP25]], 31
-; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 31
-; CHECK-NEXT: [[TMP30:%.*]] = or i16 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP32:%.*]] = and i16 [[TMP31]], 31
-; CHECK-NEXT: [[TMP33:%.*]] = or i16 [[TMP31]], [[TMP32]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 31
-; CHECK-NEXT: [[TMP36:%.*]] = or i16 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP38:%.*]] = and i16 [[TMP37]], 31
-; CHECK-NEXT: [[TMP39:%.*]] = or i16 [[TMP37]], [[TMP38]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 31
-; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP44:%.*]] = and i16 [[TMP43]], 31
-; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP43]], [[TMP44]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP47:%.*]] = and i16 [[TMP46]], 31
-; CHECK-NEXT: [[TMP48:%.*]] = or i16 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP49:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP50:%.*]] = and i16 [[TMP49]], 31
-; CHECK-NEXT: [[TMP51:%.*]] = or i16 [[TMP49]], [[TMP50]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16
-; CHECK-NEXT: [[TMP53:%.*]] = and i16 [[TMP52]], 31
-; CHECK-NEXT: [[TMP54:%.*]] = or i16 [[TMP52]], [[TMP53]]
-; CHECK-NEXT: [[TMP55:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17
-; CHECK-NEXT: [[TMP56:%.*]] = and i16 [[TMP55]], 31
-; CHECK-NEXT: [[TMP57:%.*]] = or i16 [[TMP55]], [[TMP56]]
-; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18
-; CHECK-NEXT: [[TMP59:%.*]] = and i16 [[TMP58]], 31
-; CHECK-NEXT: [[TMP60:%.*]] = or i16 [[TMP58]], [[TMP59]]
-; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19
-; CHECK-NEXT: [[TMP62:%.*]] = and i16 [[TMP61]], 31
-; CHECK-NEXT: [[TMP63:%.*]] = or i16 [[TMP61]], [[TMP62]]
-; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20
-; CHECK-NEXT: [[TMP65:%.*]] = and i16 [[TMP64]], 31
-; CHECK-NEXT: [[TMP66:%.*]] = or i16 [[TMP64]], [[TMP65]]
-; CHECK-NEXT: [[TMP67:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21
-; CHECK-NEXT: [[TMP68:%.*]] = and i16 [[TMP67]], 31
-; CHECK-NEXT: [[TMP69:%.*]] = or i16 [[TMP67]], [[TMP68]]
-; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22
-; CHECK-NEXT: [[TMP71:%.*]] = and i16 [[TMP70]], 31
-; CHECK-NEXT: [[TMP72:%.*]] = or i16 [[TMP70]], [[TMP71]]
-; CHECK-NEXT: [[TMP73:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23
-; CHECK-NEXT: [[TMP74:%.*]] = and i16 [[TMP73]], 31
-; CHECK-NEXT: [[TMP75:%.*]] = or i16 [[TMP73]], [[TMP74]]
-; CHECK-NEXT: [[TMP76:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24
-; CHECK-NEXT: [[TMP77:%.*]] = and i16 [[TMP76]], 31
-; CHECK-NEXT: [[TMP78:%.*]] = or i16 [[TMP76]], [[TMP77]]
-; CHECK-NEXT: [[TMP79:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25
-; CHECK-NEXT: [[TMP80:%.*]] = and i16 [[TMP79]], 31
-; CHECK-NEXT: [[TMP81:%.*]] = or i16 [[TMP79]], [[TMP80]]
-; CHECK-NEXT: [[TMP82:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26
-; CHECK-NEXT: [[TMP83:%.*]] = and i16 [[TMP82]], 31
-; CHECK-NEXT: [[TMP84:%.*]] = or i16 [[TMP82]], [[TMP83]]
-; CHECK-NEXT: [[TMP85:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27
-; CHECK-NEXT: [[TMP86:%.*]] = and i16 [[TMP85]], 31
-; CHECK-NEXT: [[TMP87:%.*]] = or i16 [[TMP85]], [[TMP86]]
-; CHECK-NEXT: [[TMP88:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28
-; CHECK-NEXT: [[TMP89:%.*]] = and i16 [[TMP88]], 31
-; CHECK-NEXT: [[TMP90:%.*]] = or i16 [[TMP88]], [[TMP89]]
-; CHECK-NEXT: [[TMP91:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29
-; CHECK-NEXT: [[TMP92:%.*]] = and i16 [[TMP91]], 31
-; CHECK-NEXT: [[TMP93:%.*]] = or i16 [[TMP91]], [[TMP92]]
-; CHECK-NEXT: [[TMP94:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30
-; CHECK-NEXT: [[TMP95:%.*]] = and i16 [[TMP94]], 31
-; CHECK-NEXT: [[TMP96:%.*]] = or i16 [[TMP94]], [[TMP95]]
-; CHECK-NEXT: [[TMP97:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31
-; CHECK-NEXT: [[TMP98:%.*]] = and i16 [[TMP97]], 31
-; CHECK-NEXT: [[TMP99:%.*]] = or i16 [[TMP97]], [[TMP98]]
-; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1:%.*]], <32 x i16> [[TMP2]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP99]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP101:%.*]], label [[TMP102:%.*]], !prof [[PROF1]]
-; CHECK: 101:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 102:
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <32 x i16> [[X1:%.*]] to <32 x i5>
+; CHECK-NEXT: [[TMP100:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1]], <32 x i16> [[TMP2]])
+; CHECK-NEXT: [[TMP5:%.*]] = bitcast <32 x i5> [[TMP3]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP6:%.*]], label [[TMP7:%.*]], !prof [[PROF1]]
+; CHECK: 6:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: unreachable
+; CHECK: 7:
; CHECK-NEXT: [[TMP103:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1]], <32 x i16> [[X2:%.*]])
; CHECK-NEXT: store <32 x i16> [[TMP100]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[TMP103]]
@@ -1969,112 +1589,18 @@ define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32
; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_hi_512(
; CHECK-NEXT: [[TMP1:%.*]] = load <32 x i16>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP2:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 128) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 192) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <32 x i16>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i16> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP6:%.*]] = and i16 [[TMP5]], 31
-; CHECK-NEXT: [[TMP7:%.*]] = or i16 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i16> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP9:%.*]] = and i16 [[TMP8]], 31
-; CHECK-NEXT: [[TMP10:%.*]] = or i16 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i16> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP12:%.*]] = and i16 [[TMP11]], 31
-; CHECK-NEXT: [[TMP13:%.*]] = or i16 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31
-; CHECK-NEXT: [[TMP16:%.*]] = or i16 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i16> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP18:%.*]] = and i16 [[TMP17]], 31
-; CHECK-NEXT: [[TMP19:%.*]] = or i16 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31
-; CHECK-NEXT: [[TMP22:%.*]] = or i16 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i16> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP24:%.*]] = and i16 [[TMP23]], 31
-; CHECK-NEXT: [[TMP25:%.*]] = or i16 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31
-; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i16> [[TMP3]], i64 8
-; CHECK-NEXT: [[TMP30:%.*]] = and i16 [[TMP29]], 31
-; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP29]], [[TMP30]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP3]], i64 9
-; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31
-; CHECK-NEXT: [[TMP34:%.*]] = or i16 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i16> [[TMP3]], i64 10
-; CHECK-NEXT: [[TMP36:%.*]] = and i16 [[TMP35]], 31
-; CHECK-NEXT: [[TMP37:%.*]] = or i16 [[TMP35]], [[TMP36]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP3]], i64 11
-; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31
-; CHECK-NEXT: [[TMP40:%.*]] = or i16 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP41:%.*]] = extractelement <32 x i16> [[TMP3]], i64 12
-; CHECK-NEXT: [[TMP42:%.*]] = and i16 [[TMP41]], 31
-; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP41]], [[TMP42]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP3]], i64 13
-; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31
-; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP47:%.*]] = extractelement <32 x i16> [[TMP3]], i64 14
-; CHECK-NEXT: [[TMP48:%.*]] = and i16 [[TMP47]], 31
-; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP47]], [[TMP48]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP3]], i64 15
-; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31
-; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP53:%.*]] = extractelement <32 x i16> [[TMP3]], i64 16
-; CHECK-NEXT: [[TMP54:%.*]] = and i16 [[TMP53]], 31
-; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP53]], [[TMP54]]
-; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP3]], i64 17
-; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31
-; CHECK-NEXT: [[TMP58:%.*]] = or i16 [[TMP56]], [[TMP57]]
-; CHECK-NEXT: [[TMP59:%.*]] = extractelement <32 x i16> [[TMP3]], i64 18
-; CHECK-NEXT: [[TMP60:%.*]] = and i16 [[TMP59]], 31
-; CHECK-NEXT: [[TMP61:%.*]] = or i16 [[TMP59]], [[TMP60]]
-; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP3]], i64 19
-; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31
-; CHECK-NEXT: [[TMP64:%.*]] = or i16 [[TMP62]], [[TMP63]]
-; CHECK-NEXT: [[TMP65:%.*]] = extractelement <32 x i16> [[TMP3]], i64 20
-; CHECK-NEXT: [[TMP66:%.*]] = and i16 [[TMP65]], 31
-; CHECK-NEXT: [[TMP67:%.*]] = or i16 [[TMP65]], [[TMP66]]
-; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP3]], i64 21
-; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31
-; CHECK-NEXT: [[TMP70:%.*]] = or i16 [[TMP68]], [[TMP69]]
-; CHECK-NEXT: [[TMP71:%.*]] = extractelement <32 x i16> [[TMP3]], i64 22
-; CHECK-NEXT: [[TMP72:%.*]] = and i16 [[TMP71]], 31
-; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP71]], [[TMP72]]
-; CHECK-NEXT: [[TMP74:%.*]] = extractelement <32 x i16> [[TMP3]], i64 23
-; CHECK-NEXT: [[TMP75:%.*]] = and i16 [[TMP74]], 31
-; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP74]], [[TMP75]]
-; CHECK-NEXT: [[TMP77:%.*]] = extractelement <32 x i16> [[TMP3]], i64 24
-; CHECK-NEXT: [[TMP78:%.*]] = and i16 [[TMP77]], 31
-; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP77]], [[TMP78]]
-; CHECK-NEXT: [[TMP80:%.*]] = extractelement <32 x i16> [[TMP3]], i64 25
-; CHECK-NEXT: [[TMP81:%.*]] = and i16 [[TMP80]], 31
-; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP80]], [[TMP81]]
-; CHECK-NEXT: [[TMP83:%.*]] = extractelement <32 x i16> [[TMP3]], i64 26
-; CHECK-NEXT: [[TMP84:%.*]] = and i16 [[TMP83]], 31
-; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP83]], [[TMP84]]
-; CHECK-NEXT: [[TMP86:%.*]] = extractelement <32 x i16> [[TMP3]], i64 27
-; CHECK-NEXT: [[TMP87:%.*]] = and i16 [[TMP86]], 31
-; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP86]], [[TMP87]]
-; CHECK-NEXT: [[TMP89:%.*]] = extractelement <32 x i16> [[TMP3]], i64 28
-; CHECK-NEXT: [[TMP90:%.*]] = and i16 [[TMP89]], 31
-; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP89]], [[TMP90]]
-; CHECK-NEXT: [[TMP92:%.*]] = extractelement <32 x i16> [[TMP3]], i64 29
-; CHECK-NEXT: [[TMP93:%.*]] = and i16 [[TMP92]], 31
-; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP92]], [[TMP93]]
-; CHECK-NEXT: [[TMP95:%.*]] = extractelement <32 x i16> [[TMP3]], i64 30
-; CHECK-NEXT: [[TMP96:%.*]] = and i16 [[TMP95]], 31
-; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP95]], [[TMP96]]
-; CHECK-NEXT: [[TMP98:%.*]] = extractelement <32 x i16> [[TMP3]], i64 31
-; CHECK-NEXT: [[TMP99:%.*]] = and i16 [[TMP98]], 31
-; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP98]], [[TMP99]]
-; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1:%.*]], <32 x i16> [[TMP2]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP100]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP102:%.*]], label [[TMP103:%.*]], !prof [[PROF1]]
-; CHECK: 102:
-; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
-; CHECK-NEXT: unreachable
-; CHECK: 103:
+; CHECK-NEXT: [[TMP5:%.*]] = trunc <32 x i16> [[X1:%.*]] to <32 x i5>
+; CHECK-NEXT: [[TMP101:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP1]], <32 x i16> [[X1]], <32 x i16> [[TMP2]])
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <32 x i5> [[TMP5]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
+; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR8]]
+; CHECK-NEXT: unreachable
+; CHECK: 9:
; CHECK-NEXT: [[TMP104:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0:%.*]], <32 x i16> [[X1]], <32 x i16> [[X2:%.*]])
; CHECK-NEXT: [[TMP105:%.*]] = bitcast i32 [[TMP4]] to <32 x i1>
; CHECK-NEXT: [[TMP106:%.*]] = bitcast i32 [[X3:%.*]] to <32 x i1>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
index 7d45cec721246..3bbbabcb29778 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/avx512vl-intrinsics.ll
@@ -1902,27 +1902,16 @@ define <4 x i32>@test_int_x86_avx512_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %
; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3
-; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP14]], 3
-; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP14]], [[TMP7]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 3
-; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 3
-; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[X1]] to <4 x i2>
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP6]], <4 x i32> [[X1]], <4 x i32> [[TMP5]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP15]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i2> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; CHECK: [[BB6]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB7]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
@@ -1937,28 +1926,18 @@ define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i
; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP13]], 3
-; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 3
-; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP9]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP9]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 3
-; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP20]], [[TMP21]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i32> [[X1]] to <4 x i2>
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP8]], <4 x i32> [[X1]], <4 x i32> [[TMP6]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP25]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]]
-; CHECK: [[BB18]]:
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i2> [[TMP9]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK: [[BB8]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB19]]:
+; CHECK: [[BB9]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> [[X1]], <4 x i32> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -1985,27 +1964,16 @@ define <4 x i32>@test_int_x86_avx512_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %
; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3
-; CHECK-NEXT: [[TMP16:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP14]], 3
-; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP14]], [[TMP7]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 3
-; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 3
-; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[X0]] to <4 x i2>
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP6]], <4 x i32> [[X0]], <4 x i32> [[TMP5]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP15]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i2> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; CHECK: [[BB6]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB7]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X1]], <4 x i32> [[X0]], <4 x i32> [[X2]])
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[TMP1]]
@@ -2020,28 +1988,17 @@ define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i
; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP13]], 3
-; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP13]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 3
-; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP9]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP9]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 3
-; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP20]], [[TMP21]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i32> [[X0]] to <4 x i2>
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP8]], <4 x i32> [[X0]], <4 x i32> [[TMP6]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP25]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]]
-; CHECK: [[BB18]]:
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i2> [[TMP9]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
+; CHECK: [[BB7]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB19]]:
+; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X1]], <4 x i32> [[X0]], <4 x i32> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -2069,28 +2026,17 @@ define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x
; CHECK-SAME: <4 x i32> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 3
-; CHECK-NEXT: [[TMP22:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 3
-; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 3
-; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP20]], [[TMP21]]
+; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i32> [[X0]] to <4 x i2>
; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP8]], <4 x i32> [[X0]], <4 x i32> [[TMP9]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP25]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]]
-; CHECK: [[BB18]]:
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i2> [[TMP14]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP15]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
+; CHECK: [[BB7]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB19]]:
+; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X1]], <4 x i32> [[X0]], <4 x i32> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -2119,39 +2065,16 @@ define <8 x i32>@test_int_x86_avx512_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %
; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 7
-; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP22]], 7
-; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP22]], [[TMP7]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7
-; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 7
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i32> [[X1]] to <8 x i3>
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP6]], <8 x i32> [[X1]], <8 x i32> [[TMP5]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP27]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB29:.*]], label %[[BB30:.*]], !prof [[PROF1]]
-; CHECK: [[BB29]]:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i3> [[TMP3]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; CHECK: [[BB6]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB30]]:
+; CHECK: [[BB7]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
@@ -2166,40 +2089,18 @@ define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i
; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP9]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP9]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP13]], 7
-; CHECK-NEXT: [[TMP34:%.*]] = or i32 [[TMP13]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP29]], 7
-; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP29]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 7
-; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <8 x i32> [[X1]] to <8 x i3>
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP8]], <8 x i32> [[X1]], <8 x i32> [[TMP6]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP37]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
-; CHECK: [[BB30]]:
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i3> [[TMP9]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK: [[BB8]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB31]]:
+; CHECK: [[BB9]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> [[X1]], <8 x i32> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -2223,39 +2124,16 @@ define <8 x i32>@test_int_x86_avx512_ask_vpermt2var_d_256(<8 x i32> %x0, <8 x i3
; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 7
-; CHECK-NEXT: [[TMP23:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i32 [[TMP22]], 7
-; CHECK-NEXT: [[TMP25:%.*]] = or i32 [[TMP22]], [[TMP7]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = and i32 [[TMP8]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7
-; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 7
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i32> [[X0]] to <8 x i3>
; CHECK-NEXT: [[TMP4:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP6]], <8 x i32> [[X0]], <8 x i32> [[TMP5]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP27]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB29:.*]], label %[[BB30:.*]], !prof [[PROF1]]
-; CHECK: [[BB29]]:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i3> [[TMP3]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; CHECK: [[BB6]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB30]]:
+; CHECK: [[BB7]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X1]], <8 x i32> [[X0]], <8 x i32> [[X2]])
; CHECK-NEXT: store <8 x i32> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[TMP1]]
@@ -2270,40 +2148,17 @@ define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i
; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP9]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP9]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP13]], 7
-; CHECK-NEXT: [[TMP34:%.*]] = or i32 [[TMP13]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP29]], 7
-; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP29]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 7
-; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <8 x i32> [[X0]] to <8 x i3>
; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP8]], <8 x i32> [[X0]], <8 x i32> [[TMP6]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP37]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
-; CHECK: [[BB30]]:
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <8 x i3> [[TMP9]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
+; CHECK: [[BB7]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB31]]:
+; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X1]], <8 x i32> [[X0]], <8 x i32> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -2328,40 +2183,17 @@ define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x
; CHECK-SAME: <8 x i32> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x i32> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP21:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP21]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP21]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP27]], [[TMP28]]
-; CHECK-NEXT: [[TMP29:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP29]], 7
-; CHECK-NEXT: [[TMP34:%.*]] = or i32 [[TMP29]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 7
-; CHECK-NEXT: [[TMP35:%.*]] = or i32 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP18:%.*]] = and i32 [[TMP17]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 7
-; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP19]], [[TMP20]]
+; CHECK-NEXT: [[TMP14:%.*]] = trunc <8 x i32> [[X0]] to <8 x i3>
; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP8]], <8 x i32> [[X0]], <8 x i32> [[TMP9]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP37]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB30:.*]], label %[[BB31:.*]], !prof [[PROF1]]
-; CHECK: [[BB30]]:
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <8 x i3> [[TMP14]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP15]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
+; CHECK: [[BB7]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB31]]:
+; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X1]], <8 x i32> [[X0]], <8 x i32> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -2387,24 +2219,19 @@ define <2 x double>@test_int_x86_avx512_vpermi2var_pd_128(<2 x double> %x0, <2 x
; CHECK-SAME: <2 x double> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x double> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP12:%.*]] = and i64 [[TMP11]], 1
-; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP11]], [[TMP12]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP6]], 1
-; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP6]], [[TMP13]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[X1]] to <2 x i1>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <2 x i64> [[TMP9]] to <2 x double>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[TMP4]] to <2 x double>
; CHECK-NEXT: [[TMP10:%.*]] = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> [[TMP8]], <2 x i64> [[X1]], <2 x double> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x double> [[TMP10]] to <2 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB14:.*]], label %[[BB15:.*]], !prof [[PROF1]]
-; CHECK: [[BB14]]:
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <2 x i1> [[TMP3]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP11]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]]
+; CHECK: [[BB9]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB15]]:
+; CHECK: [[BB10]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> [[X0]], <2 x i64> [[X1]], <2 x double> [[X2]])
; CHECK-NEXT: store <2 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x double> [[TMP1]]
@@ -2422,22 +2249,18 @@ define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0,
; CHECK-NEXT: [[TMP13:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i64> [[TMP13]], i64 0
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP15]], 1
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP15]], [[TMP19]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i64> [[TMP13]], i64 1
-; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], 1
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP22]], [[TMP23]]
+; CHECK-NEXT: [[TMP15:%.*]] = trunc <2 x i64> [[X1]] to <2 x i1>
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <2 x i64> [[TMP11]] to <2 x double>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i64> [[TMP8]] to <2 x double>
; CHECK-NEXT: [[TMP17:%.*]] = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> [[TMP9]], <2 x i64> [[X1]], <2 x double> [[TMP12]])
; CHECK-NEXT: [[TMP18:%.*]] = bitcast <2 x double> [[TMP17]] to <2 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP25]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB15:.*]], label %[[BB16:.*]], !prof [[PROF1]]
-; CHECK: [[BB15]]:
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <2 x i1> [[TMP15]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP19]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; CHECK: [[BB11]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB16]]:
+; CHECK: [[BB12]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x double> @llvm.x86.avx512.vpermi2var.pd.128(<2 x double> [[X0]], <2 x i64> [[X1]], <2 x double> [[X2]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[X1]] to <2 x double>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -2470,30 +2293,19 @@ define <4 x double>@test_int_x86_avx512_vpermi2var_pd_256(<4 x double> %x0, <4 x
; CHECK-SAME: <4 x double> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x double> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3
-; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP6]], 3
-; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP6]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP15]], 3
-; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP17]], 3
-; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP17]], [[TMP11]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i64> [[X1]] to <4 x i2>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i64> [[TMP9]] to <4 x double>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[TMP4]] to <4 x double>
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> [[TMP8]], <4 x i64> [[X1]], <4 x double> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x double> [[TMP10]] to <4 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP21]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK: [[BB20]]:
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i2> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP11]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]]
+; CHECK: [[BB9]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB21]]:
+; CHECK: [[BB10]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> [[X0]], <4 x i64> [[X1]], <4 x double> [[X2]])
; CHECK-NEXT: store <4 x i64> [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x double> [[TMP1]]
@@ -2511,28 +2323,18 @@ define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0,
; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i64> [[TMP13]], i64 0
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP15]], 3
-; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP15]], [[TMP19]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i64> [[TMP13]], i64 1
-; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], 3
-; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i64> [[TMP13]], i64 2
-; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 3
-; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i64> [[TMP13]], i64 3
-; CHECK-NEXT: [[TMP27:%.*]] = and i64 [[TMP26]], 3
-; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i64> [[X1]] to <4 x i2>
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i64> [[TMP11]] to <4 x double>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i64> [[TMP8]] to <4 x double>
; CHECK-NEXT: [[TMP17:%.*]] = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> [[TMP9]], <4 x i64> [[X1]], <4 x double> [[TMP12]])
; CHECK-NEXT: [[TMP18:%.*]] = bitcast <4 x double> [[TMP17]] to <4 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP31]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]]
-; CHECK: [[BB21]]:
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i2> [[TMP15]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP19]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; CHECK: [[BB11]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB22]]:
+; CHECK: [[BB12]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x double> @llvm.x86.avx512.vpermi2var.pd.256(<4 x double> [[X0]], <4 x i64> [[X1]], <4 x double> [[X2]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i64> [[X1]] to <4 x double>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -2565,30 +2367,19 @@ define <4 x float>@test_int_x86_avx512_vpermi2var_ps_128(<4 x float> %x0, <4 x i
; CHECK-SAME: <4 x float> [[X0:%.*]], <4 x i32> [[X1:%.*]], <4 x float> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3
-; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP6]], 3
-; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP6]], [[TMP14]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 3
-; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP17]], 3
-; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP17]], [[TMP11]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[X1]] to <4 x i2>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <4 x i32> [[TMP9]] to <4 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP4]] to <4 x float>
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[TMP8]], <4 x i32> [[X1]], <4 x float> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x float> [[TMP10]] to <4 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP21]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB20:.*]], label %[[BB21:.*]], !prof [[PROF1]]
-; CHECK: [[BB20]]:
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x i2> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP11]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]]
+; CHECK: [[BB9]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB21]]:
+; CHECK: [[BB10]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[X0]], <4 x i32> [[X1]], <4 x float> [[X2]])
; CHECK-NEXT: store <4 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x float> [[TMP1]]
@@ -2606,28 +2397,18 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <
; CHECK-NEXT: [[TMP13:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP13]], i64 0
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP15]], 3
-; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP15]], [[TMP19]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP13]], i64 1
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 3
-; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <4 x i32> [[TMP13]], i64 2
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 3
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <4 x i32> [[TMP13]], i64 3
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 3
-; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP26]], [[TMP27]]
+; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i32> [[X1]] to <4 x i2>
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x i32> [[TMP11]] to <4 x float>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i32> [[TMP8]] to <4 x float>
; CHECK-NEXT: [[TMP17:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[TMP9]], <4 x i32> [[X1]], <4 x float> [[TMP12]])
; CHECK-NEXT: [[TMP18:%.*]] = bitcast <4 x float> [[TMP17]] to <4 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP31]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB21:.*]], label %[[BB22:.*]], !prof [[PROF1]]
-; CHECK: [[BB21]]:
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <4 x i2> [[TMP15]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP19]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; CHECK: [[BB11]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB22]]:
+; CHECK: [[BB12]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[X0]], <4 x i32> [[X1]], <4 x float> [[X2]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[X1]] to <4 x float>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -2664,28 +2445,18 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %
; CHECK-NEXT: call void @llvm.donothing()
; CHECK-NEXT: [[TMP14:%.*]] = bitcast <2 x i64> [[TMP11]] to <4 x i32>
; CHECK-NEXT: [[X1CAST:%.*]] = bitcast <2 x i64> [[X1]] to <4 x i32>
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP14]], i64 0
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP20]], 3
-; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP20]], [[TMP23]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i32> [[TMP14]], i64 1
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP8]], 3
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP8]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <4 x i32> [[TMP14]], i64 2
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 3
-; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP25]], [[TMP26]]
-; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i32> [[TMP14]], i64 3
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP27]], 3
-; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP27]], [[TMP28]]
+; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i32> [[X1CAST]] to <4 x i2>
; CHECK-NEXT: [[TMP16:%.*]] = bitcast <4 x i32> [[TMP12]] to <4 x float>
; CHECK-NEXT: [[TMP18:%.*]] = bitcast <4 x i32> [[TMP13]] to <4 x float>
; CHECK-NEXT: [[TMP19:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[TMP16]], <4 x i32> [[X1CAST]], <4 x float> [[TMP18]])
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <4 x float> [[TMP19]] to <4 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP32]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB22:.*]], label %[[BB23:.*]], !prof [[PROF1]]
-; CHECK: [[BB22]]:
+; CHECK-NEXT: [[TMP20:%.*]] = bitcast <4 x i2> [[TMP8]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP20]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
+; CHECK: [[BB12]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB23]]:
+; CHECK: [[BB13]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x float> @llvm.x86.avx512.vpermi2var.ps.128(<4 x float> [[X0]], <4 x i32> [[X1CAST]], <4 x float> [[X2]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[X1CAST]] to <4 x float>
; CHECK-NEXT: [[TMP15:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -2719,42 +2490,19 @@ define <8 x float>@test_int_x86_avx512_vpermi2var_ps_256(<8 x float> %x0, <8 x i
; CHECK-SAME: <8 x float> [[X0:%.*]], <8 x i32> [[X1:%.*]], <8 x float> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP9:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP4:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP22:%.*]] = and i32 [[TMP6]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP6]], [[TMP22]]
-; CHECK-NEXT: [[TMP23:%.*]] = extractelement <8 x i32> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 7
-; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP23]], [[TMP24]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP25]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP25]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP3]], i64 4
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP3]], i64 5
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP3]], i64 6
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP3]], i64 7
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i32> [[X1]] to <8 x i3>
; CHECK-NEXT: [[TMP8:%.*]] = bitcast <8 x i32> [[TMP9]] to <8 x float>
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i32> [[TMP4]] to <8 x float>
; CHECK-NEXT: [[TMP10:%.*]] = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> [[TMP8]], <8 x i32> [[X1]], <8 x float> [[TMP5]])
; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x float> [[TMP10]] to <8 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP27]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB32:.*]], label %[[BB33:.*]], !prof [[PROF1]]
-; CHECK: [[BB32]]:
+; CHECK-NEXT: [[TMP11:%.*]] = bitcast <8 x i3> [[TMP3]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP11]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB9:.*]], label %[[BB10:.*]], !prof [[PROF1]]
+; CHECK: [[BB9]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB33]]:
+; CHECK: [[BB10]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> [[X0]], <8 x i32> [[X1]], <8 x float> [[X2]])
; CHECK-NEXT: store <8 x i32> [[TMP7]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x float> [[TMP1]]
@@ -2772,40 +2520,18 @@ define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <
; CHECK-NEXT: [[TMP13:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP4:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP13]], i64 0
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 7
-; CHECK-NEXT: [[TMP36:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP13]], i64 1
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 7
-; CHECK-NEXT: [[TMP37:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <8 x i32> [[TMP13]], i64 2
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 7
-; CHECK-NEXT: [[TMP38:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <8 x i32> [[TMP13]], i64 3
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 7
-; CHECK-NEXT: [[TMP39:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <8 x i32> [[TMP13]], i64 4
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 7
-; CHECK-NEXT: [[TMP40:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <8 x i32> [[TMP13]], i64 5
-; CHECK-NEXT: [[TMP32:%.*]] = and i32 [[TMP15]], 7
-; CHECK-NEXT: [[TMP41:%.*]] = or i32 [[TMP15]], [[TMP32]]
-; CHECK-NEXT: [[TMP33:%.*]] = extractelement <8 x i32> [[TMP13]], i64 6
-; CHECK-NEXT: [[TMP34:%.*]] = and i32 [[TMP33]], 7
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP33]], [[TMP34]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP13]], i64 7
-; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP19]], 7
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP19]], [[TMP35]]
+; CHECK-NEXT: [[TMP15:%.*]] = trunc <8 x i32> [[X1]] to <8 x i3>
; CHECK-NEXT: [[TMP9:%.*]] = bitcast <8 x i32> [[TMP11]] to <8 x float>
; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i32> [[TMP8]] to <8 x float>
; CHECK-NEXT: [[TMP17:%.*]] = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> [[TMP9]], <8 x i32> [[X1]], <8 x float> [[TMP12]])
; CHECK-NEXT: [[TMP18:%.*]] = bitcast <8 x float> [[TMP17]] to <8 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP43]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB33:.*]], label %[[BB34:.*]], !prof [[PROF1]]
-; CHECK: [[BB33]]:
+; CHECK-NEXT: [[TMP19:%.*]] = bitcast <8 x i3> [[TMP15]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP19]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
+; CHECK: [[BB11]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB34]]:
+; CHECK: [[BB12]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.x86.avx512.vpermi2var.ps.256(<8 x float> [[X0]], <8 x i32> [[X1]], <8 x float> [[X2]])
; CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i32> [[X1]] to <8 x float>
; CHECK-NEXT: [[TMP14:%.*]] = bitcast i8 [[TMP4]] to <8 x i1>
@@ -2835,21 +2561,16 @@ define <2 x i64>@test_int_x86_avx512_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %
; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP10]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP10]], [[TMP7]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[X1]] to <2 x i1>
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP6]], <2 x i64> [[X1]], <2 x i64> [[TMP5]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
-; CHECK: [[BB11]]:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i1> [[TMP3]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; CHECK: [[BB6]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB12]]:
+; CHECK: [[BB7]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[X1]], <2 x i64> [[X2]])
; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
@@ -2864,22 +2585,18 @@ define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i
; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP9]], 1
-; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP9]], [[TMP13]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 1
-; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <2 x i64> [[X1]] to <2 x i1>
; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP8]], <2 x i64> [[X1]], <2 x i64> [[TMP6]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP19]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
-; CHECK: [[BB12]]:
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i1> [[TMP9]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK: [[BB8]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB13]]:
+; CHECK: [[BB9]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[X1]], <2 x i64> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -2906,21 +2623,16 @@ define <2 x i64>@test_int_x86_avx512_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %
; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP10]], 1
-; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP10]], [[TMP7]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[X0]] to <2 x i1>
; CHECK-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP6]], <2 x i64> [[X0]], <2 x i64> [[TMP5]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB11:.*]], label %[[BB12:.*]], !prof [[PROF1]]
-; CHECK: [[BB11]]:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i1> [[TMP3]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; CHECK: [[BB6]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB12]]:
+; CHECK: [[BB7]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X1]], <2 x i64> [[X0]], <2 x i64> [[X2]])
; CHECK-NEXT: store <2 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i64> [[TMP1]]
@@ -2935,22 +2647,17 @@ define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i
; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP9]], 1
-; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP9]], [[TMP13]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 1
-; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <2 x i64> [[X0]] to <2 x i1>
; CHECK-NEXT: [[TMP5:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP8]], <2 x i64> [[X0]], <2 x i64> [[TMP6]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP19]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
-; CHECK: [[BB12]]:
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <2 x i1> [[TMP9]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
+; CHECK: [[BB7]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB13]]:
+; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X1]], <2 x i64> [[X0]], <2 x i64> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -2978,22 +2685,17 @@ define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x
; CHECK-SAME: <2 x i64> [[X0:%.*]], <2 x i64> [[X1:%.*]], <2 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP9:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 48) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <2 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 1
-; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <2 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 1
-; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP14:%.*]] = trunc <2 x i64> [[X0]] to <2 x i1>
; CHECK-NEXT: [[TMP13:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP8]], <2 x i64> [[X0]], <2 x i64> [[TMP9]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP19]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB12:.*]], label %[[BB13:.*]], !prof [[PROF1]]
-; CHECK: [[BB12]]:
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <2 x i1> [[TMP14]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP15]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
+; CHECK: [[BB7]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB13]]:
+; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X1]], <2 x i64> [[X0]], <2 x i64> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -3022,27 +2724,16 @@ define <4 x i64>@test_int_x86_avx512_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %
; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3
-; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP14]], 3
-; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP14]], [[TMP7]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 3
-; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 3
-; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i64> [[X1]] to <4 x i2>
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP6]], <4 x i64> [[X1]], <4 x i64> [[TMP5]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i2> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; CHECK: [[BB6]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB7]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> [[X1]], <4 x i64> [[X2]])
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
@@ -3057,28 +2748,18 @@ define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i
; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
+; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP13]], 3
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP13]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 3
-; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP9]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP9]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 3
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP20]], [[TMP21]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i64> [[X1]] to <4 x i2>
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP8]], <4 x i64> [[X1]], <4 x i64> [[TMP6]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP25]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]]
-; CHECK: [[BB18]]:
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i2> [[TMP9]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB8:.*]], label %[[BB9:.*]], !prof [[PROF1]]
+; CHECK: [[BB8]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB19]]:
+; CHECK: [[BB9]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> [[X1]], <4 x i64> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -3105,27 +2786,16 @@ define <4 x i64>@test_int_x86_avx512_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %
; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[X2:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3
-; CHECK-NEXT: [[TMP16:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP7:%.*]] = and i64 [[TMP14]], 3
-; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP14]], [[TMP7]]
-; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP8]], 3
-; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP8]], [[TMP9]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 3
-; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP10]], [[TMP11]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i64> [[X0]] to <4 x i2>
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP6]], <4 x i64> [[X0]], <4 x i64> [[TMP5]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i2> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB6:.*]], label %[[BB7:.*]], !prof [[PROF1]]
+; CHECK: [[BB6]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB7]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X1]], <4 x i64> [[X0]], <4 x i64> [[X2]])
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i64> [[TMP1]]
@@ -3140,28 +2810,17 @@ define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i
; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP6:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP13]], 3
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP13]], [[TMP16]]
-; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP18:%.*]] = and i64 [[TMP17]], 3
-; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP17]], [[TMP18]]
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP9]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP9]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 3
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP20]], [[TMP21]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i64> [[X0]] to <4 x i2>
; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP8]], <4 x i64> [[X0]], <4 x i64> [[TMP6]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP25]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]]
-; CHECK: [[BB18]]:
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i2> [[TMP9]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
+; CHECK: [[BB7]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB19]]:
+; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X1]], <4 x i64> [[X0]], <4 x i64> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
@@ -3189,28 +2848,17 @@ define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x
; CHECK-SAME: <4 x i64> [[X0:%.*]], <4 x i64> [[X1:%.*]], <4 x i64> [[X2:%.*]], i8 [[X3:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP8:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP9:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 64) to ptr), align 8
-; CHECK-NEXT: [[TMP3:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
; CHECK-NEXT: [[TMP11:%.*]] = load i8, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 96) to ptr), align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP3]], i64 0
-; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 3
-; CHECK-NEXT: [[TMP22:%.*]] = or i64 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP3]], i64 1
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 3
-; CHECK-NEXT: [[TMP23:%.*]] = or i64 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i64> [[TMP3]], i64 2
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 3
-; CHECK-NEXT: [[TMP24:%.*]] = or i64 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i64> [[TMP3]], i64 3
-; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 3
-; CHECK-NEXT: [[TMP25:%.*]] = or i64 [[TMP20]], [[TMP21]]
+; CHECK-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[X0]] to <4 x i2>
; CHECK-NEXT: [[TMP13:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP8]], <4 x i64> [[X0]], <4 x i64> [[TMP9]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP25]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB18:.*]], label %[[BB19:.*]], !prof [[PROF1]]
-; CHECK: [[BB18]]:
+; CHECK-NEXT: [[TMP15:%.*]] = bitcast <4 x i2> [[TMP14]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP15]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB7:.*]], label %[[BB8:.*]], !prof [[PROF1]]
+; CHECK: [[BB7]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR6]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB19]]:
+; CHECK: [[BB8]]:
; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X1]], <4 x i64> [[X0]], <4 x i64> [[X2]])
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i8 [[TMP11]] to <8 x i1>
; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[X3]] to <8 x i1>
diff --git a/llvm/test/Instrumentation/MemorySanitizer/X86/x86-vpermi2.ll b/llvm/test/Instrumentation/MemorySanitizer/X86/x86-vpermi2.ll
index 95c1bbf251a6a..400499e5a9d93 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/X86/x86-vpermi2.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/X86/x86-vpermi2.ll
@@ -53,19 +53,15 @@ define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits(<2 x i64> %x0, <2 x i64> %x
; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <2 x i64> [[M]], <i64 0, i64 4>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 1
-; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 1
-; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <2 x i64> [[T]] to <2 x i1>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP6]], <2 x i64> [[T]], <2 x i64> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1:![0-9]+]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i1> [[TMP10]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1:![0-9]+]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4:[0-9]+]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[T]], <2 x i64> [[X1]])
; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i64> [[R]]
@@ -89,19 +85,15 @@ define <2 x i64> @shuffle_vpermv3_v2i64_demandedbits_negative(<2 x i64> %x0, <2
; CHECK-NEXT: [[TMP8:%.*]] = or <2 x i64> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <2 x i64> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <2 x i64> [[M]], <i64 0, i64 2>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i64> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 1
-; CHECK-NEXT: [[TMP14:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i64> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 1
-; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP12]], [[TMP13]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <2 x i64> [[T]] to <2 x i1>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[TMP6]], <2 x i64> [[T]], <2 x i64> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB17:.*]], label %[[BB18:.*]], !prof [[PROF1]]
-; CHECK: [[BB17]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <2 x i1> [[TMP10]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB18]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <2 x i64> @llvm.x86.avx512.vpermi2var.q.128(<2 x i64> [[X0]], <2 x i64> [[T]], <2 x i64> [[X1]])
; CHECK-NEXT: store <2 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x i64> [[R]]
@@ -154,25 +146,15 @@ define <4 x i64> @shuffle_vpermv3_v4i64_demandedbits(<4 x i64> %x0, <4 x i64> %x
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i64> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i64> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <4 x i64> [[M]], <i64 0, i64 8, i64 16, i64 32>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 3
-; CHECK-NEXT: [[TMP19:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 3
-; CHECK-NEXT: [[TMP20:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i64> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 3
-; CHECK-NEXT: [[TMP18:%.*]] = or i64 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i64> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 3
-; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i64> [[T]] to <4 x i2>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[TMP6]], <4 x i64> [[T]], <4 x i64> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP21]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB23:.*]], label %[[BB24:.*]], !prof [[PROF1]]
-; CHECK: [[BB23]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i2> [[TMP10]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB24]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <4 x i64> @llvm.x86.avx512.vpermi2var.q.256(<4 x i64> [[X0]], <4 x i64> [[T]], <4 x i64> [[X1]])
; CHECK-NEXT: store <4 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i64> [[R]]
@@ -225,37 +207,15 @@ define <8 x i64> @shuffle_vpermv3_v8i64_demandedbits(<8 x i64> %x0, <8 x i64> %x
; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i64> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i64> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <8 x i64> [[M]], <i64 0, i64 16, i64 32, i64 64, i64 256, i64 512, i64 1024, i64 -16>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i64> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i64 [[TMP12]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i64> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i64 [[TMP14]], 7
-; CHECK-NEXT: [[TMP29:%.*]] = or i64 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i64> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP16]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i64 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i64 [[TMP18]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i64 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i64> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i64 [[TMP20]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i64 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i64> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i64 [[TMP22]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i64> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i64 [[TMP24]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i64 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <8 x i64> [[T]] to <8 x i3>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[TMP6]], <8 x i64> [[T]], <8 x i64> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP33]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB35:.*]], label %[[BB36:.*]], !prof [[PROF1]]
-; CHECK: [[BB35]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i3> [[TMP10]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB36]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <8 x i64> @llvm.x86.avx512.vpermi2var.q.512(<8 x i64> [[X0]], <8 x i64> [[T]], <8 x i64> [[X1]])
; CHECK-NEXT: store <8 x i64> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i64> [[R]]
@@ -312,25 +272,15 @@ define <4 x i32> @shuffle_vpermv3_v4i32_demandedbits(<4 x i32> %x0, <4 x i32> %x
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <4 x i32> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <4 x i32> [[M]], <i32 0, i32 8, i32 16, i32 32>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 3
-; CHECK-NEXT: [[TMP19:%.*]] = or i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3
-; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i32> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 3
-; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i32> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 3
-; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP16]], [[TMP17]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <4 x i32> [[T]] to <4 x i2>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[TMP6]], <4 x i32> [[T]], <4 x i32> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP21]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB23:.*]], label %[[BB24:.*]], !prof [[PROF1]]
-; CHECK: [[BB23]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <4 x i2> [[TMP10]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB24]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <4 x i32> @llvm.x86.avx512.vpermi2var.d.128(<4 x i32> [[X0]], <4 x i32> [[T]], <4 x i32> [[X1]])
; CHECK-NEXT: store <4 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x i32> [[R]]
@@ -383,37 +333,15 @@ define <8 x i32> @shuffle_vpermv3_v8i32_demandedbits(<8 x i32> %x0, <8 x i32> %x
; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i32> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i32> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <8 x i32> [[M]], <i32 0, i32 16, i32 32, i32 64, i32 256, i32 512, i32 -16, i32 -32>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i32> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i32> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 7
-; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i32> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i32> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i32> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i32 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <8 x i32> [[T]] to <8 x i3>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[TMP6]], <8 x i32> [[T]], <8 x i32> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP33]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB35:.*]], label %[[BB36:.*]], !prof [[PROF1]]
-; CHECK: [[BB35]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i3> [[TMP10]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB36]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <8 x i32> @llvm.x86.avx512.vpermi2var.d.256(<8 x i32> [[X0]], <8 x i32> [[T]], <8 x i32> [[X1]])
; CHECK-NEXT: store <8 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i32> [[R]]
@@ -466,61 +394,15 @@ define <16 x i32> @shuffle_vpermv3_v16i32_demandedbits(<16 x i32> %x0, <16 x i32
; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i32> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i32> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <16 x i32> [[M]], <i32 0, i32 32, i32 64, i32 256, i32 512, i32 1024, i32 2048, i32 4096, i32 8192, i32 -32, i32 -64, i32 -128, i32 -256, i32 -512, i32 -1024, i32 -2048>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i32> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i32> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i32> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i32 [[TMP14]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i32 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i32> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i32> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i32 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i32> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i32 [[TMP20]], 15
-; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i32> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i32> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i32 [[TMP24]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i32 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i32> [[TMP9]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i32 [[TMP26]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i32 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[TMP9]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i32 [[TMP28]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i32 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[TMP9]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i32 [[TMP30]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i32 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[TMP9]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i32 [[TMP32]], 15
-; CHECK-NEXT: [[TMP45:%.*]] = or i32 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[TMP9]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i32 [[TMP34]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i32 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i32> [[TMP9]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i32 [[TMP36]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i32> [[TMP9]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i32 [[TMP38]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i32 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i32> [[TMP9]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i32 [[TMP40]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i32 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i32> [[T]] to <16 x i4>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[TMP6]], <16 x i32> [[T]], <16 x i32> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP57]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB59:.*]], label %[[BB60:.*]], !prof [[PROF1]]
-; CHECK: [[BB59]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i4> [[TMP10]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB60]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <16 x i32> @llvm.x86.avx512.vpermi2var.d.512(<16 x i32> [[X0]], <16 x i32> [[T]], <16 x i32> [[X1]])
; CHECK-NEXT: store <16 x i32> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i32> [[R]]
@@ -577,37 +459,15 @@ define <8 x i16> @shuffle_vpermv3_v8i16_demandedbits(<8 x i16> %x0, <8 x i16> %x
; CHECK-NEXT: [[TMP8:%.*]] = or <8 x i16> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <8 x i16> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <8 x i16> [[M]], <i16 0, i16 16, i16 32, i16 64, i16 256, i16 512, i16 -16, i16 -32>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i16> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 7
-; CHECK-NEXT: [[TMP26:%.*]] = or i16 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i16 [[TMP12]], 7
-; CHECK-NEXT: [[TMP28:%.*]] = or i16 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <8 x i16> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 7
-; CHECK-NEXT: [[TMP29:%.*]] = or i16 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i16> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 7
-; CHECK-NEXT: [[TMP31:%.*]] = or i16 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <8 x i16> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i16 [[TMP18]], 7
-; CHECK-NEXT: [[TMP32:%.*]] = or i16 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <8 x i16> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i16 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i16> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 7
-; CHECK-NEXT: [[TMP30:%.*]] = or i16 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <8 x i16> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i16 [[TMP24]], 7
-; CHECK-NEXT: [[TMP33:%.*]] = or i16 [[TMP24]], [[TMP25]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <8 x i16> [[T]] to <8 x i3>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[TMP6]], <8 x i16> [[T]], <8 x i16> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP33]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB35:.*]], label %[[BB36:.*]], !prof [[PROF1]]
-; CHECK: [[BB35]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <8 x i3> [[TMP10]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB36]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> [[X0]], <8 x i16> [[T]], <8 x i16> [[X1]])
; CHECK-NEXT: store <8 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x i16> [[R]]
@@ -660,61 +520,15 @@ define <16 x i16> @shuffle_vpermv3_v16i16_demandedbits(<16 x i16> %x0, <16 x i16
; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i16> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i16> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <16 x i16> [[M]], <i16 0, i16 32, i16 64, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 -32, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i16> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i16 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i16> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i16 [[TMP12]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i16 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i16> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i16 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i16> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i16 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i16> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i16 [[TMP18]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i16 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i16> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 15
-; CHECK-NEXT: [[TMP50:%.*]] = or i16 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i16> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i16 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i16> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i16 [[TMP24]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i16 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i16> [[TMP9]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i16 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i16> [[TMP9]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i16 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i16> [[TMP9]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i16 [[TMP30]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i16 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i16> [[TMP9]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 15
-; CHECK-NEXT: [[TMP45:%.*]] = or i16 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i16> [[TMP9]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i16 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i16> [[TMP9]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i16 [[TMP36]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i16 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i16> [[TMP9]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i16 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i16> [[TMP9]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i16 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i16> [[T]] to <16 x i4>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[TMP6]], <16 x i16> [[T]], <16 x i16> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i16 [[TMP57]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB59:.*]], label %[[BB60:.*]], !prof [[PROF1]]
-; CHECK: [[BB59]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i4> [[TMP10]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB60]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> [[X0]], <16 x i16> [[T]], <16 x i16> [[X1]])
; CHECK-NEXT: store <16 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i16> [[R]]
@@ -767,109 +581,15 @@ define <32 x i16> @shuffle_vpermv3_v32i16_demandedbits(<32 x i16> %x0, <32 x i16
; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i16> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i16> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <32 x i16> [[M]], <i16 0, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 0, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096, i16 0, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096, i16 0, i16 -64, i16 -128, i16 -256, i16 -512, i16 -1024, i16 -2048, i16 -4096>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i16> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i16 [[TMP10]], 31
-; CHECK-NEXT: [[TMP74:%.*]] = or i16 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <32 x i16> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i16 [[TMP12]], 31
-; CHECK-NEXT: [[TMP76:%.*]] = or i16 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i16> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i16 [[TMP14]], 31
-; CHECK-NEXT: [[TMP77:%.*]] = or i16 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i16> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i16 [[TMP16]], 31
-; CHECK-NEXT: [[TMP79:%.*]] = or i16 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <32 x i16> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i16 [[TMP18]], 31
-; CHECK-NEXT: [[TMP80:%.*]] = or i16 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i16> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i16 [[TMP20]], 31
-; CHECK-NEXT: [[TMP82:%.*]] = or i16 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i16> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i16 [[TMP22]], 31
-; CHECK-NEXT: [[TMP83:%.*]] = or i16 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <32 x i16> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i16 [[TMP24]], 31
-; CHECK-NEXT: [[TMP85:%.*]] = or i16 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i16> [[TMP9]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i16 [[TMP26]], 31
-; CHECK-NEXT: [[TMP86:%.*]] = or i16 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i16> [[TMP9]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i16 [[TMP28]], 31
-; CHECK-NEXT: [[TMP88:%.*]] = or i16 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <32 x i16> [[TMP9]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i16 [[TMP30]], 31
-; CHECK-NEXT: [[TMP89:%.*]] = or i16 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i16> [[TMP9]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i16 [[TMP32]], 31
-; CHECK-NEXT: [[TMP91:%.*]] = or i16 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i16> [[TMP9]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i16 [[TMP34]], 31
-; CHECK-NEXT: [[TMP92:%.*]] = or i16 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <32 x i16> [[TMP9]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i16 [[TMP36]], 31
-; CHECK-NEXT: [[TMP94:%.*]] = or i16 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i16> [[TMP9]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i16 [[TMP38]], 31
-; CHECK-NEXT: [[TMP95:%.*]] = or i16 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i16> [[TMP9]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i16 [[TMP40]], 31
-; CHECK-NEXT: [[TMP97:%.*]] = or i16 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <32 x i16> [[TMP9]], i64 16
-; CHECK-NEXT: [[TMP43:%.*]] = and i16 [[TMP42]], 31
-; CHECK-NEXT: [[TMP98:%.*]] = or i16 [[TMP42]], [[TMP43]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i16> [[TMP9]], i64 17
-; CHECK-NEXT: [[TMP45:%.*]] = and i16 [[TMP44]], 31
-; CHECK-NEXT: [[TMP100:%.*]] = or i16 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i16> [[TMP9]], i64 18
-; CHECK-NEXT: [[TMP47:%.*]] = and i16 [[TMP46]], 31
-; CHECK-NEXT: [[TMP101:%.*]] = or i16 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <32 x i16> [[TMP9]], i64 19
-; CHECK-NEXT: [[TMP49:%.*]] = and i16 [[TMP48]], 31
-; CHECK-NEXT: [[TMP103:%.*]] = or i16 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i16> [[TMP9]], i64 20
-; CHECK-NEXT: [[TMP51:%.*]] = and i16 [[TMP50]], 31
-; CHECK-NEXT: [[TMP105:%.*]] = or i16 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i16> [[TMP9]], i64 21
-; CHECK-NEXT: [[TMP53:%.*]] = and i16 [[TMP52]], 31
-; CHECK-NEXT: [[TMP75:%.*]] = or i16 [[TMP52]], [[TMP53]]
-; CHECK-NEXT: [[TMP54:%.*]] = extractelement <32 x i16> [[TMP9]], i64 22
-; CHECK-NEXT: [[TMP55:%.*]] = and i16 [[TMP54]], 31
-; CHECK-NEXT: [[TMP78:%.*]] = or i16 [[TMP54]], [[TMP55]]
-; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i16> [[TMP9]], i64 23
-; CHECK-NEXT: [[TMP57:%.*]] = and i16 [[TMP56]], 31
-; CHECK-NEXT: [[TMP81:%.*]] = or i16 [[TMP56]], [[TMP57]]
-; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i16> [[TMP9]], i64 24
-; CHECK-NEXT: [[TMP59:%.*]] = and i16 [[TMP58]], 31
-; CHECK-NEXT: [[TMP84:%.*]] = or i16 [[TMP58]], [[TMP59]]
-; CHECK-NEXT: [[TMP60:%.*]] = extractelement <32 x i16> [[TMP9]], i64 25
-; CHECK-NEXT: [[TMP61:%.*]] = and i16 [[TMP60]], 31
-; CHECK-NEXT: [[TMP87:%.*]] = or i16 [[TMP60]], [[TMP61]]
-; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i16> [[TMP9]], i64 26
-; CHECK-NEXT: [[TMP63:%.*]] = and i16 [[TMP62]], 31
-; CHECK-NEXT: [[TMP90:%.*]] = or i16 [[TMP62]], [[TMP63]]
-; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i16> [[TMP9]], i64 27
-; CHECK-NEXT: [[TMP65:%.*]] = and i16 [[TMP64]], 31
-; CHECK-NEXT: [[TMP93:%.*]] = or i16 [[TMP64]], [[TMP65]]
-; CHECK-NEXT: [[TMP66:%.*]] = extractelement <32 x i16> [[TMP9]], i64 28
-; CHECK-NEXT: [[TMP67:%.*]] = and i16 [[TMP66]], 31
-; CHECK-NEXT: [[TMP96:%.*]] = or i16 [[TMP66]], [[TMP67]]
-; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i16> [[TMP9]], i64 29
-; CHECK-NEXT: [[TMP69:%.*]] = and i16 [[TMP68]], 31
-; CHECK-NEXT: [[TMP99:%.*]] = or i16 [[TMP68]], [[TMP69]]
-; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i16> [[TMP9]], i64 30
-; CHECK-NEXT: [[TMP71:%.*]] = and i16 [[TMP70]], 31
-; CHECK-NEXT: [[TMP102:%.*]] = or i16 [[TMP70]], [[TMP71]]
-; CHECK-NEXT: [[TMP72:%.*]] = extractelement <32 x i16> [[TMP9]], i64 31
-; CHECK-NEXT: [[TMP104:%.*]] = and i16 [[TMP72]], 31
-; CHECK-NEXT: [[TMP73:%.*]] = or i16 [[TMP72]], [[TMP104]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <32 x i16> [[T]] to <32 x i5>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[TMP6]], <32 x i16> [[T]], <32 x i16> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP60:%.*]] = icmp ne i16 [[TMP73]], 0
-; CHECK-NEXT: br i1 [[_MSCMP60]], label %[[BB107:.*]], label %[[BB108:.*]], !prof [[PROF1]]
-; CHECK: [[BB107]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i5> [[TMP10]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB108]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> [[X0]], <32 x i16> [[T]], <32 x i16> [[X1]])
; CHECK-NEXT: store <32 x i16> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i16> [[R]]
@@ -926,61 +646,15 @@ define <16 x i8> @shuffle_vpermv3_v16i8_demandedbits(<16 x i8> %x0, <16 x i8> %x
; CHECK-NEXT: [[TMP8:%.*]] = or <16 x i8> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <16 x i8> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <16 x i8> [[M]], <i8 0, i8 32, i8 64, i8 -128, i8 0, i8 -32, i8 -64, i8 -128, i8 0, i8 32, i8 64, i8 -128, i8 0, i8 -32, i8 -64, i8 -128>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <16 x i8> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], 15
-; CHECK-NEXT: [[TMP43:%.*]] = or i8 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <16 x i8> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i8 [[TMP12]], 15
-; CHECK-NEXT: [[TMP44:%.*]] = or i8 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i8> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i8 [[TMP14]], 15
-; CHECK-NEXT: [[TMP46:%.*]] = or i8 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <16 x i8> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i8 [[TMP16]], 15
-; CHECK-NEXT: [[TMP47:%.*]] = or i8 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i8> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i8 [[TMP18]], 15
-; CHECK-NEXT: [[TMP49:%.*]] = or i8 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i8> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i8 [[TMP20]], 15
-; CHECK-NEXT: [[TMP50:%.*]] = or i8 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <16 x i8> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i8 [[TMP22]], 15
-; CHECK-NEXT: [[TMP52:%.*]] = or i8 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <16 x i8> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i8 [[TMP24]], 15
-; CHECK-NEXT: [[TMP53:%.*]] = or i8 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i8> [[TMP9]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i8 [[TMP26]], 15
-; CHECK-NEXT: [[TMP55:%.*]] = or i8 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i8> [[TMP9]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i8 [[TMP28]], 15
-; CHECK-NEXT: [[TMP56:%.*]] = or i8 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <16 x i8> [[TMP9]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i8 [[TMP30]], 15
-; CHECK-NEXT: [[TMP42:%.*]] = or i8 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i8> [[TMP9]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i8 [[TMP32]], 15
-; CHECK-NEXT: [[TMP45:%.*]] = or i8 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i8> [[TMP9]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i8 [[TMP34]], 15
-; CHECK-NEXT: [[TMP48:%.*]] = or i8 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <16 x i8> [[TMP9]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i8 [[TMP36]], 15
-; CHECK-NEXT: [[TMP51:%.*]] = or i8 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i8> [[TMP9]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i8 [[TMP38]], 15
-; CHECK-NEXT: [[TMP54:%.*]] = or i8 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i8> [[TMP9]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i8 [[TMP40]], 15
-; CHECK-NEXT: [[TMP57:%.*]] = or i8 [[TMP40]], [[TMP41]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <16 x i8> [[T]] to <16 x i4>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[TMP6]], <16 x i8> [[T]], <16 x i8> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP57]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB59:.*]], label %[[BB60:.*]], !prof [[PROF1]]
-; CHECK: [[BB59]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <16 x i4> [[TMP10]] to i64
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB60]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <16 x i8> @llvm.x86.avx512.vpermi2var.qi.128(<16 x i8> [[X0]], <16 x i8> [[T]], <16 x i8> [[X1]])
; CHECK-NEXT: store <16 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <16 x i8> [[R]]
@@ -1033,109 +707,15 @@ define <32 x i8> @shuffle_vpermv3_v32i8_demandedbits(<32 x i8> %x0, <32 x i8> %x
; CHECK-NEXT: [[TMP8:%.*]] = or <32 x i8> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <32 x i8> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <32 x i8> [[M]], <i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128, i8 0, i8 0, i8 64, i8 -128, i8 0, i8 0, i8 -64, i8 -128>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i8> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], 31
-; CHECK-NEXT: [[TMP74:%.*]] = or i8 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <32 x i8> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i8 [[TMP12]], 31
-; CHECK-NEXT: [[TMP76:%.*]] = or i8 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i8> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i8 [[TMP14]], 31
-; CHECK-NEXT: [[TMP77:%.*]] = or i8 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i8> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i8 [[TMP16]], 31
-; CHECK-NEXT: [[TMP79:%.*]] = or i8 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <32 x i8> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i8 [[TMP18]], 31
-; CHECK-NEXT: [[TMP80:%.*]] = or i8 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i8> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i8 [[TMP20]], 31
-; CHECK-NEXT: [[TMP82:%.*]] = or i8 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i8> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i8 [[TMP22]], 31
-; CHECK-NEXT: [[TMP83:%.*]] = or i8 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <32 x i8> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i8 [[TMP24]], 31
-; CHECK-NEXT: [[TMP85:%.*]] = or i8 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i8> [[TMP9]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i8 [[TMP26]], 31
-; CHECK-NEXT: [[TMP86:%.*]] = or i8 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i8> [[TMP9]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i8 [[TMP28]], 31
-; CHECK-NEXT: [[TMP88:%.*]] = or i8 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <32 x i8> [[TMP9]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i8 [[TMP30]], 31
-; CHECK-NEXT: [[TMP89:%.*]] = or i8 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i8> [[TMP9]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i8 [[TMP32]], 31
-; CHECK-NEXT: [[TMP91:%.*]] = or i8 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <32 x i8> [[TMP9]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i8 [[TMP34]], 31
-; CHECK-NEXT: [[TMP92:%.*]] = or i8 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <32 x i8> [[TMP9]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i8 [[TMP36]], 31
-; CHECK-NEXT: [[TMP94:%.*]] = or i8 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <32 x i8> [[TMP9]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i8 [[TMP38]], 31
-; CHECK-NEXT: [[TMP95:%.*]] = or i8 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i8> [[TMP9]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i8 [[TMP40]], 31
-; CHECK-NEXT: [[TMP97:%.*]] = or i8 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <32 x i8> [[TMP9]], i64 16
-; CHECK-NEXT: [[TMP43:%.*]] = and i8 [[TMP42]], 31
-; CHECK-NEXT: [[TMP98:%.*]] = or i8 [[TMP42]], [[TMP43]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <32 x i8> [[TMP9]], i64 17
-; CHECK-NEXT: [[TMP45:%.*]] = and i8 [[TMP44]], 31
-; CHECK-NEXT: [[TMP100:%.*]] = or i8 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <32 x i8> [[TMP9]], i64 18
-; CHECK-NEXT: [[TMP47:%.*]] = and i8 [[TMP46]], 31
-; CHECK-NEXT: [[TMP101:%.*]] = or i8 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <32 x i8> [[TMP9]], i64 19
-; CHECK-NEXT: [[TMP49:%.*]] = and i8 [[TMP48]], 31
-; CHECK-NEXT: [[TMP103:%.*]] = or i8 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <32 x i8> [[TMP9]], i64 20
-; CHECK-NEXT: [[TMP51:%.*]] = and i8 [[TMP50]], 31
-; CHECK-NEXT: [[TMP105:%.*]] = or i8 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <32 x i8> [[TMP9]], i64 21
-; CHECK-NEXT: [[TMP53:%.*]] = and i8 [[TMP52]], 31
-; CHECK-NEXT: [[TMP75:%.*]] = or i8 [[TMP52]], [[TMP53]]
-; CHECK-NEXT: [[TMP54:%.*]] = extractelement <32 x i8> [[TMP9]], i64 22
-; CHECK-NEXT: [[TMP55:%.*]] = and i8 [[TMP54]], 31
-; CHECK-NEXT: [[TMP78:%.*]] = or i8 [[TMP54]], [[TMP55]]
-; CHECK-NEXT: [[TMP56:%.*]] = extractelement <32 x i8> [[TMP9]], i64 23
-; CHECK-NEXT: [[TMP57:%.*]] = and i8 [[TMP56]], 31
-; CHECK-NEXT: [[TMP81:%.*]] = or i8 [[TMP56]], [[TMP57]]
-; CHECK-NEXT: [[TMP58:%.*]] = extractelement <32 x i8> [[TMP9]], i64 24
-; CHECK-NEXT: [[TMP59:%.*]] = and i8 [[TMP58]], 31
-; CHECK-NEXT: [[TMP84:%.*]] = or i8 [[TMP58]], [[TMP59]]
-; CHECK-NEXT: [[TMP60:%.*]] = extractelement <32 x i8> [[TMP9]], i64 25
-; CHECK-NEXT: [[TMP61:%.*]] = and i8 [[TMP60]], 31
-; CHECK-NEXT: [[TMP87:%.*]] = or i8 [[TMP60]], [[TMP61]]
-; CHECK-NEXT: [[TMP62:%.*]] = extractelement <32 x i8> [[TMP9]], i64 26
-; CHECK-NEXT: [[TMP63:%.*]] = and i8 [[TMP62]], 31
-; CHECK-NEXT: [[TMP90:%.*]] = or i8 [[TMP62]], [[TMP63]]
-; CHECK-NEXT: [[TMP64:%.*]] = extractelement <32 x i8> [[TMP9]], i64 27
-; CHECK-NEXT: [[TMP65:%.*]] = and i8 [[TMP64]], 31
-; CHECK-NEXT: [[TMP93:%.*]] = or i8 [[TMP64]], [[TMP65]]
-; CHECK-NEXT: [[TMP66:%.*]] = extractelement <32 x i8> [[TMP9]], i64 28
-; CHECK-NEXT: [[TMP67:%.*]] = and i8 [[TMP66]], 31
-; CHECK-NEXT: [[TMP96:%.*]] = or i8 [[TMP66]], [[TMP67]]
-; CHECK-NEXT: [[TMP68:%.*]] = extractelement <32 x i8> [[TMP9]], i64 29
-; CHECK-NEXT: [[TMP69:%.*]] = and i8 [[TMP68]], 31
-; CHECK-NEXT: [[TMP99:%.*]] = or i8 [[TMP68]], [[TMP69]]
-; CHECK-NEXT: [[TMP70:%.*]] = extractelement <32 x i8> [[TMP9]], i64 30
-; CHECK-NEXT: [[TMP71:%.*]] = and i8 [[TMP70]], 31
-; CHECK-NEXT: [[TMP102:%.*]] = or i8 [[TMP70]], [[TMP71]]
-; CHECK-NEXT: [[TMP72:%.*]] = extractelement <32 x i8> [[TMP9]], i64 31
-; CHECK-NEXT: [[TMP104:%.*]] = and i8 [[TMP72]], 31
-; CHECK-NEXT: [[TMP73:%.*]] = or i8 [[TMP72]], [[TMP104]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <32 x i8> [[T]] to <32 x i5>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[TMP6]], <32 x i8> [[T]], <32 x i8> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP60:%.*]] = icmp ne i8 [[TMP73]], 0
-; CHECK-NEXT: br i1 [[_MSCMP60]], label %[[BB107:.*]], label %[[BB108:.*]], !prof [[PROF1]]
-; CHECK: [[BB107]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <32 x i5> [[TMP10]] to i160
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i160 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB108]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <32 x i8> @llvm.x86.avx512.vpermi2var.qi.256(<32 x i8> [[X0]], <32 x i8> [[T]], <32 x i8> [[X1]])
; CHECK-NEXT: store <32 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <32 x i8> [[R]]
@@ -1188,205 +768,15 @@ define <64 x i8> @shuffle_vpermv3_v64i8_demandedbits(<64 x i8> %x0, <64 x i8> %x
; CHECK-NEXT: [[TMP8:%.*]] = or <64 x i8> [[TMP5]], [[TMP2]]
; CHECK-NEXT: [[TMP9:%.*]] = or <64 x i8> [[TMP8]], [[TMP7]]
; CHECK-NEXT: [[T:%.*]] = or <64 x i8> [[M]], <i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128, i8 0, i8 -128>
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <64 x i8> [[TMP9]], i64 0
-; CHECK-NEXT: [[TMP11:%.*]] = and i8 [[TMP10]], 63
-; CHECK-NEXT: [[TMP139:%.*]] = or i8 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <64 x i8> [[TMP9]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i8 [[TMP12]], 63
-; CHECK-NEXT: [[TMP140:%.*]] = or i8 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP14:%.*]] = extractelement <64 x i8> [[TMP9]], i64 2
-; CHECK-NEXT: [[TMP15:%.*]] = and i8 [[TMP14]], 63
-; CHECK-NEXT: [[TMP142:%.*]] = or i8 [[TMP14]], [[TMP15]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <64 x i8> [[TMP9]], i64 3
-; CHECK-NEXT: [[TMP17:%.*]] = and i8 [[TMP16]], 63
-; CHECK-NEXT: [[TMP143:%.*]] = or i8 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <64 x i8> [[TMP9]], i64 4
-; CHECK-NEXT: [[TMP19:%.*]] = and i8 [[TMP18]], 63
-; CHECK-NEXT: [[TMP145:%.*]] = or i8 [[TMP18]], [[TMP19]]
-; CHECK-NEXT: [[TMP20:%.*]] = extractelement <64 x i8> [[TMP9]], i64 5
-; CHECK-NEXT: [[TMP21:%.*]] = and i8 [[TMP20]], 63
-; CHECK-NEXT: [[TMP146:%.*]] = or i8 [[TMP20]], [[TMP21]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <64 x i8> [[TMP9]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i8 [[TMP22]], 63
-; CHECK-NEXT: [[TMP148:%.*]] = or i8 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP24:%.*]] = extractelement <64 x i8> [[TMP9]], i64 7
-; CHECK-NEXT: [[TMP25:%.*]] = and i8 [[TMP24]], 63
-; CHECK-NEXT: [[TMP149:%.*]] = or i8 [[TMP24]], [[TMP25]]
-; CHECK-NEXT: [[TMP26:%.*]] = extractelement <64 x i8> [[TMP9]], i64 8
-; CHECK-NEXT: [[TMP27:%.*]] = and i8 [[TMP26]], 63
-; CHECK-NEXT: [[TMP151:%.*]] = or i8 [[TMP26]], [[TMP27]]
-; CHECK-NEXT: [[TMP28:%.*]] = extractelement <64 x i8> [[TMP9]], i64 9
-; CHECK-NEXT: [[TMP29:%.*]] = and i8 [[TMP28]], 63
-; CHECK-NEXT: [[TMP152:%.*]] = or i8 [[TMP28]], [[TMP29]]
-; CHECK-NEXT: [[TMP30:%.*]] = extractelement <64 x i8> [[TMP9]], i64 10
-; CHECK-NEXT: [[TMP31:%.*]] = and i8 [[TMP30]], 63
-; CHECK-NEXT: [[TMP154:%.*]] = or i8 [[TMP30]], [[TMP31]]
-; CHECK-NEXT: [[TMP32:%.*]] = extractelement <64 x i8> [[TMP9]], i64 11
-; CHECK-NEXT: [[TMP33:%.*]] = and i8 [[TMP32]], 63
-; CHECK-NEXT: [[TMP155:%.*]] = or i8 [[TMP32]], [[TMP33]]
-; CHECK-NEXT: [[TMP34:%.*]] = extractelement <64 x i8> [[TMP9]], i64 12
-; CHECK-NEXT: [[TMP35:%.*]] = and i8 [[TMP34]], 63
-; CHECK-NEXT: [[TMP157:%.*]] = or i8 [[TMP34]], [[TMP35]]
-; CHECK-NEXT: [[TMP36:%.*]] = extractelement <64 x i8> [[TMP9]], i64 13
-; CHECK-NEXT: [[TMP37:%.*]] = and i8 [[TMP36]], 63
-; CHECK-NEXT: [[TMP158:%.*]] = or i8 [[TMP36]], [[TMP37]]
-; CHECK-NEXT: [[TMP38:%.*]] = extractelement <64 x i8> [[TMP9]], i64 14
-; CHECK-NEXT: [[TMP39:%.*]] = and i8 [[TMP38]], 63
-; CHECK-NEXT: [[TMP160:%.*]] = or i8 [[TMP38]], [[TMP39]]
-; CHECK-NEXT: [[TMP40:%.*]] = extractelement <64 x i8> [[TMP9]], i64 15
-; CHECK-NEXT: [[TMP41:%.*]] = and i8 [[TMP40]], 63
-; CHECK-NEXT: [[TMP161:%.*]] = or i8 [[TMP40]], [[TMP41]]
-; CHECK-NEXT: [[TMP42:%.*]] = extractelement <64 x i8> [[TMP9]], i64 16
-; CHECK-NEXT: [[TMP43:%.*]] = and i8 [[TMP42]], 63
-; CHECK-NEXT: [[TMP163:%.*]] = or i8 [[TMP42]], [[TMP43]]
-; CHECK-NEXT: [[TMP44:%.*]] = extractelement <64 x i8> [[TMP9]], i64 17
-; CHECK-NEXT: [[TMP45:%.*]] = and i8 [[TMP44]], 63
-; CHECK-NEXT: [[TMP164:%.*]] = or i8 [[TMP44]], [[TMP45]]
-; CHECK-NEXT: [[TMP46:%.*]] = extractelement <64 x i8> [[TMP9]], i64 18
-; CHECK-NEXT: [[TMP47:%.*]] = and i8 [[TMP46]], 63
-; CHECK-NEXT: [[TMP166:%.*]] = or i8 [[TMP46]], [[TMP47]]
-; CHECK-NEXT: [[TMP48:%.*]] = extractelement <64 x i8> [[TMP9]], i64 19
-; CHECK-NEXT: [[TMP49:%.*]] = and i8 [[TMP48]], 63
-; CHECK-NEXT: [[TMP167:%.*]] = or i8 [[TMP48]], [[TMP49]]
-; CHECK-NEXT: [[TMP50:%.*]] = extractelement <64 x i8> [[TMP9]], i64 20
-; CHECK-NEXT: [[TMP51:%.*]] = and i8 [[TMP50]], 63
-; CHECK-NEXT: [[TMP169:%.*]] = or i8 [[TMP50]], [[TMP51]]
-; CHECK-NEXT: [[TMP52:%.*]] = extractelement <64 x i8> [[TMP9]], i64 21
-; CHECK-NEXT: [[TMP53:%.*]] = and i8 [[TMP52]], 63
-; CHECK-NEXT: [[TMP170:%.*]] = or i8 [[TMP52]], [[TMP53]]
-; CHECK-NEXT: [[TMP54:%.*]] = extractelement <64 x i8> [[TMP9]], i64 22
-; CHECK-NEXT: [[TMP55:%.*]] = and i8 [[TMP54]], 63
-; CHECK-NEXT: [[TMP172:%.*]] = or i8 [[TMP54]], [[TMP55]]
-; CHECK-NEXT: [[TMP56:%.*]] = extractelement <64 x i8> [[TMP9]], i64 23
-; CHECK-NEXT: [[TMP57:%.*]] = and i8 [[TMP56]], 63
-; CHECK-NEXT: [[TMP173:%.*]] = or i8 [[TMP56]], [[TMP57]]
-; CHECK-NEXT: [[TMP58:%.*]] = extractelement <64 x i8> [[TMP9]], i64 24
-; CHECK-NEXT: [[TMP59:%.*]] = and i8 [[TMP58]], 63
-; CHECK-NEXT: [[TMP175:%.*]] = or i8 [[TMP58]], [[TMP59]]
-; CHECK-NEXT: [[TMP60:%.*]] = extractelement <64 x i8> [[TMP9]], i64 25
-; CHECK-NEXT: [[TMP61:%.*]] = and i8 [[TMP60]], 63
-; CHECK-NEXT: [[TMP176:%.*]] = or i8 [[TMP60]], [[TMP61]]
-; CHECK-NEXT: [[TMP62:%.*]] = extractelement <64 x i8> [[TMP9]], i64 26
-; CHECK-NEXT: [[TMP63:%.*]] = and i8 [[TMP62]], 63
-; CHECK-NEXT: [[TMP178:%.*]] = or i8 [[TMP62]], [[TMP63]]
-; CHECK-NEXT: [[TMP64:%.*]] = extractelement <64 x i8> [[TMP9]], i64 27
-; CHECK-NEXT: [[TMP65:%.*]] = and i8 [[TMP64]], 63
-; CHECK-NEXT: [[TMP179:%.*]] = or i8 [[TMP64]], [[TMP65]]
-; CHECK-NEXT: [[TMP66:%.*]] = extractelement <64 x i8> [[TMP9]], i64 28
-; CHECK-NEXT: [[TMP67:%.*]] = and i8 [[TMP66]], 63
-; CHECK-NEXT: [[TMP181:%.*]] = or i8 [[TMP66]], [[TMP67]]
-; CHECK-NEXT: [[TMP68:%.*]] = extractelement <64 x i8> [[TMP9]], i64 29
-; CHECK-NEXT: [[TMP69:%.*]] = and i8 [[TMP68]], 63
-; CHECK-NEXT: [[TMP182:%.*]] = or i8 [[TMP68]], [[TMP69]]
-; CHECK-NEXT: [[TMP70:%.*]] = extractelement <64 x i8> [[TMP9]], i64 30
-; CHECK-NEXT: [[TMP71:%.*]] = and i8 [[TMP70]], 63
-; CHECK-NEXT: [[TMP184:%.*]] = or i8 [[TMP70]], [[TMP71]]
-; CHECK-NEXT: [[TMP72:%.*]] = extractelement <64 x i8> [[TMP9]], i64 31
-; CHECK-NEXT: [[TMP73:%.*]] = and i8 [[TMP72]], 63
-; CHECK-NEXT: [[TMP185:%.*]] = or i8 [[TMP72]], [[TMP73]]
-; CHECK-NEXT: [[TMP74:%.*]] = extractelement <64 x i8> [[TMP9]], i64 32
-; CHECK-NEXT: [[TMP75:%.*]] = and i8 [[TMP74]], 63
-; CHECK-NEXT: [[TMP187:%.*]] = or i8 [[TMP74]], [[TMP75]]
-; CHECK-NEXT: [[TMP76:%.*]] = extractelement <64 x i8> [[TMP9]], i64 33
-; CHECK-NEXT: [[TMP77:%.*]] = and i8 [[TMP76]], 63
-; CHECK-NEXT: [[TMP188:%.*]] = or i8 [[TMP76]], [[TMP77]]
-; CHECK-NEXT: [[TMP78:%.*]] = extractelement <64 x i8> [[TMP9]], i64 34
-; CHECK-NEXT: [[TMP79:%.*]] = and i8 [[TMP78]], 63
-; CHECK-NEXT: [[TMP190:%.*]] = or i8 [[TMP78]], [[TMP79]]
-; CHECK-NEXT: [[TMP80:%.*]] = extractelement <64 x i8> [[TMP9]], i64 35
-; CHECK-NEXT: [[TMP81:%.*]] = and i8 [[TMP80]], 63
-; CHECK-NEXT: [[TMP191:%.*]] = or i8 [[TMP80]], [[TMP81]]
-; CHECK-NEXT: [[TMP82:%.*]] = extractelement <64 x i8> [[TMP9]], i64 36
-; CHECK-NEXT: [[TMP83:%.*]] = and i8 [[TMP82]], 63
-; CHECK-NEXT: [[TMP193:%.*]] = or i8 [[TMP82]], [[TMP83]]
-; CHECK-NEXT: [[TMP84:%.*]] = extractelement <64 x i8> [[TMP9]], i64 37
-; CHECK-NEXT: [[TMP85:%.*]] = and i8 [[TMP84]], 63
-; CHECK-NEXT: [[TMP194:%.*]] = or i8 [[TMP84]], [[TMP85]]
-; CHECK-NEXT: [[TMP86:%.*]] = extractelement <64 x i8> [[TMP9]], i64 38
-; CHECK-NEXT: [[TMP87:%.*]] = and i8 [[TMP86]], 63
-; CHECK-NEXT: [[TMP196:%.*]] = or i8 [[TMP86]], [[TMP87]]
-; CHECK-NEXT: [[TMP88:%.*]] = extractelement <64 x i8> [[TMP9]], i64 39
-; CHECK-NEXT: [[TMP89:%.*]] = and i8 [[TMP88]], 63
-; CHECK-NEXT: [[TMP197:%.*]] = or i8 [[TMP88]], [[TMP89]]
-; CHECK-NEXT: [[TMP90:%.*]] = extractelement <64 x i8> [[TMP9]], i64 40
-; CHECK-NEXT: [[TMP91:%.*]] = and i8 [[TMP90]], 63
-; CHECK-NEXT: [[TMP199:%.*]] = or i8 [[TMP90]], [[TMP91]]
-; CHECK-NEXT: [[TMP92:%.*]] = extractelement <64 x i8> [[TMP9]], i64 41
-; CHECK-NEXT: [[TMP93:%.*]] = and i8 [[TMP92]], 63
-; CHECK-NEXT: [[TMP201:%.*]] = or i8 [[TMP92]], [[TMP93]]
-; CHECK-NEXT: [[TMP94:%.*]] = extractelement <64 x i8> [[TMP9]], i64 42
-; CHECK-NEXT: [[TMP95:%.*]] = and i8 [[TMP94]], 63
-; CHECK-NEXT: [[TMP138:%.*]] = or i8 [[TMP94]], [[TMP95]]
-; CHECK-NEXT: [[TMP96:%.*]] = extractelement <64 x i8> [[TMP9]], i64 43
-; CHECK-NEXT: [[TMP97:%.*]] = and i8 [[TMP96]], 63
-; CHECK-NEXT: [[TMP141:%.*]] = or i8 [[TMP96]], [[TMP97]]
-; CHECK-NEXT: [[TMP98:%.*]] = extractelement <64 x i8> [[TMP9]], i64 44
-; CHECK-NEXT: [[TMP99:%.*]] = and i8 [[TMP98]], 63
-; CHECK-NEXT: [[TMP144:%.*]] = or i8 [[TMP98]], [[TMP99]]
-; CHECK-NEXT: [[TMP100:%.*]] = extractelement <64 x i8> [[TMP9]], i64 45
-; CHECK-NEXT: [[TMP101:%.*]] = and i8 [[TMP100]], 63
-; CHECK-NEXT: [[TMP147:%.*]] = or i8 [[TMP100]], [[TMP101]]
-; CHECK-NEXT: [[TMP102:%.*]] = extractelement <64 x i8> [[TMP9]], i64 46
-; CHECK-NEXT: [[TMP103:%.*]] = and i8 [[TMP102]], 63
-; CHECK-NEXT: [[TMP150:%.*]] = or i8 [[TMP102]], [[TMP103]]
-; CHECK-NEXT: [[TMP104:%.*]] = extractelement <64 x i8> [[TMP9]], i64 47
-; CHECK-NEXT: [[TMP105:%.*]] = and i8 [[TMP104]], 63
-; CHECK-NEXT: [[TMP153:%.*]] = or i8 [[TMP104]], [[TMP105]]
-; CHECK-NEXT: [[TMP106:%.*]] = extractelement <64 x i8> [[TMP9]], i64 48
-; CHECK-NEXT: [[TMP107:%.*]] = and i8 [[TMP106]], 63
-; CHECK-NEXT: [[TMP156:%.*]] = or i8 [[TMP106]], [[TMP107]]
-; CHECK-NEXT: [[TMP108:%.*]] = extractelement <64 x i8> [[TMP9]], i64 49
-; CHECK-NEXT: [[TMP109:%.*]] = and i8 [[TMP108]], 63
-; CHECK-NEXT: [[TMP159:%.*]] = or i8 [[TMP108]], [[TMP109]]
-; CHECK-NEXT: [[TMP110:%.*]] = extractelement <64 x i8> [[TMP9]], i64 50
-; CHECK-NEXT: [[TMP111:%.*]] = and i8 [[TMP110]], 63
-; CHECK-NEXT: [[TMP162:%.*]] = or i8 [[TMP110]], [[TMP111]]
-; CHECK-NEXT: [[TMP112:%.*]] = extractelement <64 x i8> [[TMP9]], i64 51
-; CHECK-NEXT: [[TMP113:%.*]] = and i8 [[TMP112]], 63
-; CHECK-NEXT: [[TMP165:%.*]] = or i8 [[TMP112]], [[TMP113]]
-; CHECK-NEXT: [[TMP114:%.*]] = extractelement <64 x i8> [[TMP9]], i64 52
-; CHECK-NEXT: [[TMP115:%.*]] = and i8 [[TMP114]], 63
-; CHECK-NEXT: [[TMP168:%.*]] = or i8 [[TMP114]], [[TMP115]]
-; CHECK-NEXT: [[TMP116:%.*]] = extractelement <64 x i8> [[TMP9]], i64 53
-; CHECK-NEXT: [[TMP117:%.*]] = and i8 [[TMP116]], 63
-; CHECK-NEXT: [[TMP171:%.*]] = or i8 [[TMP116]], [[TMP117]]
-; CHECK-NEXT: [[TMP118:%.*]] = extractelement <64 x i8> [[TMP9]], i64 54
-; CHECK-NEXT: [[TMP119:%.*]] = and i8 [[TMP118]], 63
-; CHECK-NEXT: [[TMP174:%.*]] = or i8 [[TMP118]], [[TMP119]]
-; CHECK-NEXT: [[TMP120:%.*]] = extractelement <64 x i8> [[TMP9]], i64 55
-; CHECK-NEXT: [[TMP121:%.*]] = and i8 [[TMP120]], 63
-; CHECK-NEXT: [[TMP177:%.*]] = or i8 [[TMP120]], [[TMP121]]
-; CHECK-NEXT: [[TMP122:%.*]] = extractelement <64 x i8> [[TMP9]], i64 56
-; CHECK-NEXT: [[TMP123:%.*]] = and i8 [[TMP122]], 63
-; CHECK-NEXT: [[TMP180:%.*]] = or i8 [[TMP122]], [[TMP123]]
-; CHECK-NEXT: [[TMP124:%.*]] = extractelement <64 x i8> [[TMP9]], i64 57
-; CHECK-NEXT: [[TMP125:%.*]] = and i8 [[TMP124]], 63
-; CHECK-NEXT: [[TMP183:%.*]] = or i8 [[TMP124]], [[TMP125]]
-; CHECK-NEXT: [[TMP126:%.*]] = extractelement <64 x i8> [[TMP9]], i64 58
-; CHECK-NEXT: [[TMP127:%.*]] = and i8 [[TMP126]], 63
-; CHECK-NEXT: [[TMP186:%.*]] = or i8 [[TMP126]], [[TMP127]]
-; CHECK-NEXT: [[TMP128:%.*]] = extractelement <64 x i8> [[TMP9]], i64 59
-; CHECK-NEXT: [[TMP129:%.*]] = and i8 [[TMP128]], 63
-; CHECK-NEXT: [[TMP189:%.*]] = or i8 [[TMP128]], [[TMP129]]
-; CHECK-NEXT: [[TMP130:%.*]] = extractelement <64 x i8> [[TMP9]], i64 60
-; CHECK-NEXT: [[TMP131:%.*]] = and i8 [[TMP130]], 63
-; CHECK-NEXT: [[TMP192:%.*]] = or i8 [[TMP130]], [[TMP131]]
-; CHECK-NEXT: [[TMP132:%.*]] = extractelement <64 x i8> [[TMP9]], i64 61
-; CHECK-NEXT: [[TMP133:%.*]] = and i8 [[TMP132]], 63
-; CHECK-NEXT: [[TMP195:%.*]] = or i8 [[TMP132]], [[TMP133]]
-; CHECK-NEXT: [[TMP134:%.*]] = extractelement <64 x i8> [[TMP9]], i64 62
-; CHECK-NEXT: [[TMP135:%.*]] = and i8 [[TMP134]], 63
-; CHECK-NEXT: [[TMP198:%.*]] = or i8 [[TMP134]], [[TMP135]]
-; CHECK-NEXT: [[TMP136:%.*]] = extractelement <64 x i8> [[TMP9]], i64 63
-; CHECK-NEXT: [[TMP200:%.*]] = and i8 [[TMP136]], 63
-; CHECK-NEXT: [[TMP137:%.*]] = or i8 [[TMP136]], [[TMP200]]
+; CHECK-NEXT: [[TMP10:%.*]] = trunc <64 x i8> [[T]] to <64 x i6>
; CHECK-NEXT: [[_MSPROP1:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[TMP6]], <64 x i8> [[T]], <64 x i8> [[TMP3]])
-; CHECK-NEXT: [[_MSCMP124:%.*]] = icmp ne i8 [[TMP137]], 0
-; CHECK-NEXT: br i1 [[_MSCMP124]], label %[[BB203:.*]], label %[[BB204:.*]], !prof [[PROF1]]
-; CHECK: [[BB203]]:
+; CHECK-NEXT: [[TMP12:%.*]] = bitcast <64 x i6> [[TMP10]] to i384
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i384 [[TMP12]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label %[[BB13:.*]], label %[[BB14:.*]], !prof [[PROF1]]
+; CHECK: [[BB13]]:
; CHECK-NEXT: call void @__msan_warning_noreturn() #[[ATTR4]]
; CHECK-NEXT: unreachable
-; CHECK: [[BB204]]:
+; CHECK: [[BB14]]:
; CHECK-NEXT: [[R:%.*]] = call <64 x i8> @llvm.x86.avx512.vpermi2var.qi.512(<64 x i8> [[X0]], <64 x i8> [[T]], <64 x i8> [[X1]])
; CHECK-NEXT: store <64 x i8> [[_MSPROP1]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <64 x i8> [[R]]
diff --git a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll
index 294f2cae7f7ab..34cf24c7208b7 100644
--- a/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll
+++ b/llvm/test/Instrumentation/MemorySanitizer/i386/avx-intrinsics-i386.ll
@@ -987,24 +987,19 @@ declare <8 x float> @llvm.x86.avx.rsqrt.ps.256(<8 x float>) nounwind readnone
define <2 x double> @test_x86_avx_vpermilvar_pd(<2 x double> %a0, <2 x i64> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_pd(
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP4]], 1
-; CHECK-NEXT: [[TMP11:%.*]] = or i64 [[TMP4]], [[TMP10]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 1
-; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP7]], [[TMP8]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <2 x i64> [[A1:%.*]] to <2 x i1>
; CHECK-NEXT: [[A0:%.*]] = bitcast <2 x i64> [[TMP1]] to <2 x double>
-; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A0]], <2 x i64> [[A1]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <2 x double> [[RES]] to <2 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP9]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP13:%.*]], label [[TMP14:%.*]], !prof [[PROF1]]
-; CHECK: 13:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <2 x i1> [[TMP3]] to i2
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i2 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 14:
+; CHECK: 9:
; CHECK-NEXT: [[RES1:%.*]] = call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> [[A2:%.*]], <2 x i64> [[A1]])
; CHECK-NEXT: store <2 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <2 x double> [[RES1]]
@@ -1018,30 +1013,19 @@ declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>) nounwi
define <4 x double> @test_x86_avx_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_pd_256(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i64>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i64>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i64> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = and i64 [[TMP4]], 3
-; CHECK-NEXT: [[TMP17:%.*]] = or i64 [[TMP4]], [[TMP16]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i64> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP8:%.*]] = and i64 [[TMP7]], 3
-; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i64> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = and i64 [[TMP10]], 3
-; CHECK-NEXT: [[TMP12:%.*]] = or i64 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i64> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP14:%.*]] = and i64 [[TMP13]], 3
-; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i64> [[A1:%.*]] to <4 x i2>
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i64> [[TMP1]] to <4 x double>
-; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A0]], <4 x i64> [[A1]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x double> [[RES]] to <4 x i64>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i64 [[TMP15]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
-; CHECK: 19:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i2> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 20:
+; CHECK: 9:
; CHECK-NEXT: [[RES1:%.*]] = call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> [[A2:%.*]], <4 x i64> [[A1]])
; CHECK-NEXT: store <4 x i64> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x double> [[RES1]]
@@ -1070,30 +1054,19 @@ define <4 x double> @test_x86_avx_vpermilvar_pd_256_2(<4 x double> %a0) #0 {
define <4 x float> @test_x86_avx_vpermilvar_ps(<4 x float> %a0, <4 x i32> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_ps(
; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <4 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 16) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP4]], 3
-; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP4]], [[TMP16]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 3
-; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 3
-; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP13]], 3
-; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP13]], [[TMP14]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <4 x i32> [[A1:%.*]] to <4 x i2>
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP1]] to <4 x float>
-; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A1]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP15]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP19:%.*]], label [[TMP20:%.*]], !prof [[PROF1]]
-; CHECK: 19:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i2> [[TMP3]] to i8
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 20:
+; CHECK: 9:
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A2:%.*]], <4 x i32> [[A1]])
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x float> [[RES1]]
@@ -1118,27 +1091,17 @@ define <4 x float> @test_x86_avx_vpermilvar_ps_load(<4 x float> %a0, ptr %a1) #0
; CHECK-NEXT: [[TMP6:%.*]] = and i64 [[TMP5]], -2147483649
; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr
; CHECK-NEXT: [[_MSLD:%.*]] = load <4 x i32>, ptr [[TMP7]], align 16
-; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 0
-; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP9]], 3
-; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP9]], [[TMP10]]
-; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 1
-; CHECK-NEXT: [[TMP13:%.*]] = and i32 [[TMP12]], 3
-; CHECK-NEXT: [[TMP14:%.*]] = or i32 [[TMP12]], [[TMP13]]
-; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 2
-; CHECK-NEXT: [[TMP16:%.*]] = and i32 [[TMP15]], 3
-; CHECK-NEXT: [[TMP17:%.*]] = or i32 [[TMP15]], [[TMP16]]
-; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i32> [[_MSLD]], i64 3
-; CHECK-NEXT: [[TMP19:%.*]] = and i32 [[TMP18]], 3
-; CHECK-NEXT: [[TMP20:%.*]] = or i32 [[TMP18]], [[TMP19]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i32> [[A2]] to <4 x i2>
; CHECK-NEXT: [[A0:%.*]] = bitcast <4 x i32> [[TMP2]] to <4 x float>
; CHECK-NEXT: [[RES:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A0]], <4 x i32> [[A2]])
; CHECK-NEXT: [[TMP11:%.*]] = bitcast <4 x float> [[RES]] to <4 x i32>
-; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i32 [[TMP20]], 0
-; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP24:%.*]], label [[TMP25:%.*]], !prof [[PROF1]]
-; CHECK: 24:
+; CHECK-NEXT: [[TMP13:%.*]] = bitcast <4 x i2> [[TMP9]] to i8
+; CHECK-NEXT: [[_MSCMP1:%.*]] = icmp ne i8 [[TMP13]], 0
+; CHECK-NEXT: br i1 [[_MSCMP1]], label [[TMP14:%.*]], label [[TMP15:%.*]], !prof [[PROF1]]
+; CHECK: 14:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 25:
+; CHECK: 15:
; CHECK-NEXT: [[RES1:%.*]] = call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> [[A3:%.*]], <4 x i32> [[A2]])
; CHECK-NEXT: store <4 x i32> [[TMP11]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <4 x float> [[RES1]]
@@ -1153,42 +1116,19 @@ declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>) nounwind
define <8 x float> @test_x86_avx_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) #0 {
; CHECK-LABEL: @test_x86_avx_vpermilvar_ps_256(
; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i32>, ptr @__msan_param_tls, align 8
-; CHECK-NEXT: [[TMP2:%.*]] = load <8 x i32>, ptr inttoptr (i64 add (i64 ptrtoint (ptr @__msan_param_tls to i64), i64 32) to ptr), align 8
; CHECK-NEXT: [[TMP5:%.*]] = load i64, ptr @__msan_va_arg_overflow_size_tls, align 8
; CHECK-NEXT: call void @llvm.donothing()
-; CHECK-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP2]], i64 0
-; CHECK-NEXT: [[TMP28:%.*]] = and i32 [[TMP4]], 7
-; CHECK-NEXT: [[TMP29:%.*]] = or i32 [[TMP4]], [[TMP28]]
-; CHECK-NEXT: [[TMP7:%.*]] = extractelement <8 x i32> [[TMP2]], i64 1
-; CHECK-NEXT: [[TMP8:%.*]] = and i32 [[TMP7]], 7
-; CHECK-NEXT: [[TMP9:%.*]] = or i32 [[TMP7]], [[TMP8]]
-; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i32> [[TMP2]], i64 2
-; CHECK-NEXT: [[TMP11:%.*]] = and i32 [[TMP10]], 7
-; CHECK-NEXT: [[TMP12:%.*]] = or i32 [[TMP10]], [[TMP11]]
-; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP2]], i64 3
-; CHECK-NEXT: [[TMP14:%.*]] = and i32 [[TMP13]], 7
-; CHECK-NEXT: [[TMP15:%.*]] = or i32 [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP16:%.*]] = extractelement <8 x i32> [[TMP2]], i64 4
-; CHECK-NEXT: [[TMP17:%.*]] = and i32 [[TMP16]], 7
-; CHECK-NEXT: [[TMP18:%.*]] = or i32 [[TMP16]], [[TMP17]]
-; CHECK-NEXT: [[TMP19:%.*]] = extractelement <8 x i32> [[TMP2]], i64 5
-; CHECK-NEXT: [[TMP20:%.*]] = and i32 [[TMP19]], 7
-; CHECK-NEXT: [[TMP21:%.*]] = or i32 [[TMP19]], [[TMP20]]
-; CHECK-NEXT: [[TMP22:%.*]] = extractelement <8 x i32> [[TMP2]], i64 6
-; CHECK-NEXT: [[TMP23:%.*]] = and i32 [[TMP22]], 7
-; CHECK-NEXT: [[TMP24:%.*]] = or i32 [[TMP22]], [[TMP23]]
-; CHECK-NEXT: [[TMP25:%.*]] = extractelement <8 x i32> [[TMP2]], i64 7
-; CHECK-NEXT: [[TMP26:%.*]] = and i32 [[TMP25]], 7
-; CHECK-NEXT: [[TMP27:%.*]] = or i32 [[TMP25]], [[TMP26]]
+; CHECK-NEXT: [[TMP3:%.*]] = trunc <8 x i32> [[A1:%.*]] to <8 x i3>
; CHECK-NEXT: [[A0:%.*]] = bitcast <8 x i32> [[TMP1]] to <8 x float>
-; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1:%.*]])
+; CHECK-NEXT: [[RES:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A0]], <8 x i32> [[A1]])
; CHECK-NEXT: [[TMP6:%.*]] = bitcast <8 x float> [[RES]] to <8 x i32>
-; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i32 [[TMP27]], 0
-; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP31:%.*]], label [[TMP32:%.*]], !prof [[PROF1]]
-; CHECK: 31:
+; CHECK-NEXT: [[TMP7:%.*]] = bitcast <8 x i3> [[TMP3]] to i24
+; CHECK-NEXT: [[_MSCMP:%.*]] = icmp ne i24 [[TMP7]], 0
+; CHECK-NEXT: br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof [[PROF1]]
+; CHECK: 8:
; CHECK-NEXT: call void @__msan_warning_noreturn()
; CHECK-NEXT: unreachable
-; CHECK: 32:
+; CHECK: 9:
; CHECK-NEXT: [[RES1:%.*]] = call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> [[A2:%.*]], <8 x i32> [[A1]])
; CHECK-NEXT: store <8 x i32> [[TMP6]], ptr @__msan_retval_tls, align 8
; CHECK-NEXT: ret <8 x float> [[RES1]]
More information about the llvm-commits
mailing list