[llvm] r318036 - [X86] test/testn intrinsics lowering to IR. llvm part.
Uriel Korach via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 13 04:51:18 PST 2017
Author: uriel.k
Date: Mon Nov 13 04:51:18 2017
New Revision: 318036
URL: http://llvm.org/viewvc/llvm-project?rev=318036&view=rev
Log:
[X86] test/testn intrinsics lowering to IR. llvm part.
Remove builtins from llvm and add AutoUpgrade support.
Also add fast-isel tests for the TEST and TESTN instructions.
Differential Revision: https://reviews.llvm.org/D38736
Modified:
llvm/trunk/include/llvm/IR/IntrinsicsX86.td
llvm/trunk/lib/IR/AutoUpgrade.cpp
llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
Modified: llvm/trunk/include/llvm/IR/IntrinsicsX86.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/IntrinsicsX86.td?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/IntrinsicsX86.td (original)
+++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td Mon Nov 13 04:51:18 2017
@@ -1392,80 +1392,6 @@ let TargetPrefix = "x86" in { // All in
def int_x86_avx_ptestnzc_256 : GCCBuiltin<"__builtin_ia32_ptestnzc256">,
Intrinsic<[llvm_i32_ty], [llvm_v4i64_ty,
llvm_v4i64_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_d_512 : GCCBuiltin<"__builtin_ia32_ptestmd512">,
- Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty, llvm_v16i32_ty,
- llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_q_512 : GCCBuiltin<"__builtin_ia32_ptestmq512">,
- Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty, llvm_v8i64_ty,
- llvm_i8_ty], [IntrNoMem]>;
-
- def int_x86_avx512_ptestm_b_128 : GCCBuiltin<"__builtin_ia32_ptestmb128">,
- Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty,
- llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_b_256 : GCCBuiltin<"__builtin_ia32_ptestmb256">,
- Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty,
- llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_b_512 : GCCBuiltin<"__builtin_ia32_ptestmb512">,
- Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty,
- llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_d_128 : GCCBuiltin<"__builtin_ia32_ptestmd128">,
- Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_d_256 : GCCBuiltin<"__builtin_ia32_ptestmd256">,
- Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_q_128 : GCCBuiltin<"__builtin_ia32_ptestmq128">,
- Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty,
- llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_q_256 : GCCBuiltin<"__builtin_ia32_ptestmq256">,
- Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty,
- llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_w_128 : GCCBuiltin<"__builtin_ia32_ptestmw128">,
- Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_w_256 : GCCBuiltin<"__builtin_ia32_ptestmw256">,
- Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestm_w_512 : GCCBuiltin<"__builtin_ia32_ptestmw512">,
- Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
-
- def int_x86_avx512_ptestnm_b_128 : GCCBuiltin<"__builtin_ia32_ptestnmb128">,
- Intrinsic<[llvm_i16_ty], [llvm_v16i8_ty,
- llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_b_256 : GCCBuiltin<"__builtin_ia32_ptestnmb256">,
- Intrinsic<[llvm_i32_ty], [llvm_v32i8_ty,
- llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_b_512 : GCCBuiltin<"__builtin_ia32_ptestnmb512">,
- Intrinsic<[llvm_i64_ty], [llvm_v64i8_ty,
- llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_d_128 : GCCBuiltin<"__builtin_ia32_ptestnmd128">,
- Intrinsic<[llvm_i8_ty], [llvm_v4i32_ty,
- llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_d_256 : GCCBuiltin<"__builtin_ia32_ptestnmd256">,
- Intrinsic<[llvm_i8_ty], [llvm_v8i32_ty,
- llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_d_512 : GCCBuiltin<"__builtin_ia32_ptestnmd512">,
- Intrinsic<[llvm_i16_ty], [llvm_v16i32_ty,
- llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_q_128 : GCCBuiltin<"__builtin_ia32_ptestnmq128">,
- Intrinsic<[llvm_i8_ty], [llvm_v2i64_ty,
- llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_q_256 : GCCBuiltin<"__builtin_ia32_ptestnmq256">,
- Intrinsic<[llvm_i8_ty], [llvm_v4i64_ty,
- llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_q_512 : GCCBuiltin<"__builtin_ia32_ptestnmq512">,
- Intrinsic<[llvm_i8_ty], [llvm_v8i64_ty,
- llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_w_128 : GCCBuiltin<"__builtin_ia32_ptestnmw128">,
- Intrinsic<[llvm_i8_ty], [llvm_v8i16_ty,
- llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_w_256 : GCCBuiltin<"__builtin_ia32_ptestnmw256">,
- Intrinsic<[llvm_i16_ty], [llvm_v16i16_ty,
- llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
- def int_x86_avx512_ptestnm_w_512 : GCCBuiltin<"__builtin_ia32_ptestnmw512">,
- Intrinsic<[llvm_i32_ty], [llvm_v32i16_ty,
- llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
def int_x86_avx512_mask_fpclass_pd_128 :
GCCBuiltin<"__builtin_ia32_fpclasspd128_mask">,
Modified: llvm/trunk/lib/IR/AutoUpgrade.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/AutoUpgrade.cpp?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/lib/IR/AutoUpgrade.cpp (original)
+++ llvm/trunk/lib/IR/AutoUpgrade.cpp Mon Nov 13 04:51:18 2017
@@ -259,6 +259,8 @@ static bool ShouldUpgradeX86Intrinsic(Fu
Name.startswith("avx512.cvtmask2") || // Added in 5.0
(Name.startswith("xop.vpcom") && // Added in 3.2
F->arg_size() == 2) ||
+ Name.startswith("avx512.ptestm") || //Added in 6.0
+ Name.startswith("avx512.ptestnm") || //Added in 6.0
Name.startswith("sse2.pavg") || // Added in 6.0
Name.startswith("avx2.pavg") || // Added in 6.0
Name.startswith("avx512.mask.pavg")) // Added in 6.0
@@ -826,6 +828,26 @@ static Value *upgradeIntMinMax(IRBuilder
return Res;
}
+// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
+static Value *ApplyX86MaskOn1BitsVec(IRBuilder<> &Builder,Value *Vec, Value *Mask,
+ unsigned NumElts) {
+ const auto *C = dyn_cast<Constant>(Mask);
+ if (!C || !C->isAllOnesValue())
+ Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
+
+ if (NumElts < 8) {
+ uint32_t Indices[8];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i;
+ for (unsigned i = NumElts; i != 8; ++i)
+ Indices[i] = NumElts + i % NumElts;
+ Vec = Builder.CreateShuffleVector(Vec,
+ Constant::getNullValue(Vec->getType()),
+ Indices);
+ }
+ return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
+}
+
static Value *upgradeMaskedCompare(IRBuilder<> &Builder, CallInst &CI,
unsigned CC, bool Signed) {
Value *Op0 = CI.getArgOperand(0);
@@ -851,22 +873,8 @@ static Value *upgradeMaskedCompare(IRBui
}
Value *Mask = CI.getArgOperand(CI.getNumArgOperands() - 1);
- const auto *C = dyn_cast<Constant>(Mask);
- if (!C || !C->isAllOnesValue())
- Cmp = Builder.CreateAnd(Cmp, getX86MaskVec(Builder, Mask, NumElts));
- if (NumElts < 8) {
- uint32_t Indices[8];
- for (unsigned i = 0; i != NumElts; ++i)
- Indices[i] = i;
- for (unsigned i = NumElts; i != 8; ++i)
- Indices[i] = NumElts + i % NumElts;
- Cmp = Builder.CreateShuffleVector(Cmp,
- Constant::getNullValue(Cmp->getType()),
- Indices);
- }
- return Builder.CreateBitCast(Cmp, IntegerType::get(CI.getContext(),
- std::max(NumElts, 8U)));
+ return ApplyX86MaskOn1BitsVec(Builder, Cmp, Mask, NumElts);
}
// Replace a masked intrinsic with an older unmasked intrinsic.
@@ -1038,7 +1046,20 @@ void llvm::UpgradeIntrinsicCall(CallInst
ExtTy->getPrimitiveSizeInBits();
Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
Rep = Builder.CreateVectorSplat(NumElts, Rep);
- } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))) {
+ } else if (IsX86 && (Name.startswith("avx512.ptestm") ||
+ Name.startswith("avx512.ptestnm"))) {
+ Value *Op0 = CI->getArgOperand(0);
+ Value *Op1 = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+ Rep = Builder.CreateAnd(Op0, Op1);
+ llvm::Type *Ty = Op0->getType();
+ Value *Zero = llvm::Constant::getNullValue(Ty);
+ ICmpInst::Predicate Pred =
+ Name.startswith("avx512.ptestm") ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+ Rep = Builder.CreateICmp(Pred, Rep, Zero);
+ unsigned NumElts = Op0->getType()->getVectorNumElements();
+ Rep = ApplyX86MaskOn1BitsVec(Builder, Rep, Mask, NumElts);
+ } else if (IsX86 && (Name.startswith("avx512.mask.pbroadcast"))){
unsigned NumElts =
CI->getArgOperand(1)->getType()->getVectorNumElements();
Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
Modified: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h (original)
+++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h Mon Nov 13 04:51:18 2017
@@ -1385,30 +1385,6 @@ static const IntrinsicData IntrinsicsWi
X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0),
X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_b_128, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_b_256, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_b_512, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_d_128, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_d_256, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_d_512, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_q_128, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_q_256, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_q_512, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_w_128, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_w_256, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestm_w_512, CMP_MASK, X86ISD::TESTM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_b_128, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_b_256, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_b_512, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_d_128, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_d_256, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_d_512, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_q_128, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_q_256, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_q_512, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_w_128, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_w_256, CMP_MASK, X86ISD::TESTNM, 0),
- X86_INTRINSIC_DATA(avx512_ptestnm_w_512, CMP_MASK, X86ISD::TESTNM, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_128, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_256, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
X86_INTRINSIC_DATA(avx512_rcp14_pd_512, INTR_TYPE_1OP_MASK, X86ISD::RCP14, 0),
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll Mon Nov 13 04:51:18 2017
@@ -228,6 +228,164 @@ entry:
}
+define zeroext i16 @test_mm512_testn_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_testn_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmd %zmm0, %zmm1, %k0
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_testn_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmd %zmm0, %zmm1, %k0
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
+ %1 = icmp eq <16 x i32> %0, zeroinitializer
+ %2 = bitcast <16 x i1> %1 to i16
+ ret i16 %2
+}
+
+define zeroext i16 @test_mm512_mask_testn_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_mask_testn_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_testn_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
+ %1 = icmp eq <16 x i32> %0, zeroinitializer
+ %2 = bitcast i16 %__U to <16 x i1>
+ %3 = and <16 x i1> %1, %2
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i8 @test_mm512_testn_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_testn_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmq %zmm0, %zmm1, %k0
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_testn_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmq %zmm0, %zmm1, %k0
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = icmp eq <8 x i64> %and1.i.i, zeroinitializer
+ %1 = bitcast <8 x i1> %0 to i8
+ ret i8 %1
+}
+
+define zeroext i8 @test_mm512_mask_testn_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_mask_testn_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_testn_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = icmp eq <8 x i64> %and1.i.i, zeroinitializer
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = and <8 x i1> %0, %1
+ %3 = bitcast <8 x i1> %2 to i8
+ ret i8 %3
+}
+
+define zeroext i16 @test_mm512_mask_test_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_mask_test_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_test_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
+ %1 = icmp ne <16 x i32> %0, zeroinitializer
+ %2 = bitcast i16 %__U to <16 x i1>
+ %3 = and <16 x i1> %1, %2
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i8 @test_mm512_mask_test_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_mask_test_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_test_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = icmp ne <8 x i64> %and1.i.i, zeroinitializer
+ %1 = bitcast i8 %__U to <8 x i1>
+ %2 = and <8 x i1> %0, %1
+ %3 = bitcast <8 x i1> %2 to i8
+ ret i8 %3
+}
define <8 x i64> @test_mm512_mask_set1_epi32(<8 x i64> %__O, i16 zeroext %__M, i32 %__A) {
; X32-LABEL: test_mm512_mask_set1_epi32:
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Mon Nov 13 04:51:18 2017
@@ -3724,3 +3724,77 @@ define <8 x i64>@test_int_x86_avx512_mas
ret <8 x i64> %res2
}
+define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) {
+; CHECK-LABEL: test_vptestmq:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: addb %cl, %al
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq
+ %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
+ %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m)
+ %res2 = add i8 %res1, %res
+ ret i8 %res2
+}
+declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
+
+define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) {
+; CHECK-LABEL: test_vptestmd:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %k0, %ecx
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; CHECK-NEXT: retq
+ %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
+ %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m)
+ %res2 = add i16 %res1, %res
+ ret i16 %res2
+}
+declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
+
+declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2)
+
+define i16 at test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k1 {%k1}
+; CHECK-NEXT: kmovw %k1, %ecx
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: addl %ecx, %eax
+; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; CHECK-NEXT: retq
+ %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
+ %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1)
+ %res2 = add i16 %res, %res1
+ ret i16 %res2
+}
+
+declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2)
+
+define i8 at test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k1 {%k1}
+; CHECK-NEXT: kmovw %k1, %ecx
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: addb %cl, %al
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq
+ %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll Mon Nov 13 04:51:18 2017
@@ -766,42 +766,6 @@ define <8 x double> @test_vminpd(<8 x do
declare <8 x double> @llvm.x86.avx512.mask.min.pd.512(<8 x double>, <8 x double>,
<8 x double>, i8, i32)
-define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) {
-; CHECK-LABEL: test_vptestmq:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: vptestmq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: addb %cl, %al
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq
- %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
- %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m)
- %res2 = add i8 %res1, %res
- ret i8 %res2
-}
-declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
-
-define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) {
-; CHECK-LABEL: test_vptestmd:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: vptestmd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: addl %ecx, %eax
-; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; CHECK-NEXT: retq
- %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
- %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m)
- %res2 = add i16 %res1, %res
- ret i16 %res2
-}
-declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
-
define void @test_mask_store_ss(i8* %ptr, <4 x float> %data, i8 %mask) {
; CHECK-LABEL: test_mask_store_ss:
; CHECK: ## BB#0:
@@ -4064,47 +4028,6 @@ define <2 x double>@test_int_x86_avx512_
ret <2 x double> %res4
}
-declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2)
-
-define i16 at test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: vptestnmd %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: addl %ecx, %eax
-; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; CHECK-NEXT: retq
- %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
- %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1)
- %res2 = add i16 %res, %res1
- ret i16 %res2
-}
-
-declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2)
-
-define i8 at test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: vptestnmq %zmm1, %zmm0, %k0 {%k1}
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: addb %cl, %al
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq
- %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-
-
-
declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll Mon Nov 13 04:51:18 2017
@@ -1904,5 +1904,1655 @@ define <8 x i64> @test_mm512_maskz_unpac
ret <8 x i64> %res2
}
+define i64 @test_mm512_test_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_test_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: pushl %ebp
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: vptestmb %zmm0, %zmm1, %k0
+; X32-NEXT: kmovq %k0, (%esp)
+; X32-NEXT: movl (%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_test_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestmb %zmm0, %zmm1, %k0
+; X64-NEXT: kmovq %k0, %rax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
+ %1 = icmp ne <64 x i8> %0, zeroinitializer
+ %2 = bitcast <64 x i1> %1 to i64
+ ret i64 %2
+}
+
+define i64 @test_mm512_mask_test_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_mask_test_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: pushl %ebp
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
+; X32-NEXT: andl $-64, %esp
+; X32-NEXT: subl $256, %esp # imm = 0x100
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp) # 64-byte Spill
+; X32-NEXT: vmovaps %zmm0, {{[0-9]+}}(%esp) # 64-byte Spill
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: andb $2, %cl
+; X32-NEXT: shrb %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vpsllw $8, %xmm0, %xmm0
+; X32-NEXT: kmovd %eax, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vpbroadcastw %xmm1, %xmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vpslld $24, %xmm1, %xmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $4, %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vpbroadcastd %xmm1, %xmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $5, %cl
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vpsllq $40, %xmm1, %xmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $6, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $7, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $56, %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movb %ah, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastq %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: andb $2, %cl
+; X32-NEXT: shrb %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movb %ah, %cl
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $12, %ecx
+; X32-NEXT: andl $15, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $13, %ecx
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $14, %ecx
+; X32-NEXT: andl $3, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $15, %ecx
+; X32-NEXT: andl $1, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $16, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $2, %dl
+; X32-NEXT: shrb %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllw $8, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $15, %dl
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $2, %bl
+; X32-NEXT: kmovd %ebx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslld $24, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $4, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $5, %dl
+; X32-NEXT: andb $1, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $40, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $6, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $7, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $56, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $24, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastq %xmm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $2, %dl
+; X32-NEXT: shrb %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $28, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
+; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $29, %ecx
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
+; X32-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $30, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrl $31, %eax
+; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
+; X32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: vpmovm2b %k1, %zmm7
+; X32-NEXT: vmovdqa {{.*#+}} ymm6 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm6, %ymm1, %ymm7, %ymm1
+; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: andb $2, %cl
+; X32-NEXT: shrb %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllw $8, %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslld $24, %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $4, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $5, %cl
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $40, %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $6, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $7, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $56, %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movb %ah, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastq %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: andb $2, %cl
+; X32-NEXT: shrb %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movb %ah, %cl
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $12, %ecx
+; X32-NEXT: andl $15, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $13, %ecx
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $14, %ecx
+; X32-NEXT: andl $3, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $15, %ecx
+; X32-NEXT: andl $1, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $16, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $2, %dl
+; X32-NEXT: shrb %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllw $8, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $15, %dl
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $2, %bl
+; X32-NEXT: kmovd %ebx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslld $24, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $4, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $5, %dl
+; X32-NEXT: andb $1, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $40, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $6, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $7, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $56, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $24, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastq %xmm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $2, %dl
+; X32-NEXT: shrb %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k0
+; X32-NEXT: vpmovb2m %zmm0, %k1
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k1, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
+; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
+; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm6
+; X32-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1
+; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $28, %ecx
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm5
+; X32-NEXT: vpbroadcastd %xmm5, %xmm5
+; X32-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5
+; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm5, %ymm1
+; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $29, %ecx
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm4
+; X32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2]
+; X32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
+; X32-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1
+; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $30, %ecx
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm3
+; X32-NEXT: vpbroadcastw %xmm3, %xmm3
+; X32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1
+; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kshiftlq $1, %k0, %k0
+; X32-NEXT: kshiftrq $1, %k0, %k0
+; X32-NEXT: shrl $31, %eax
+; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: korq %k1, %k0, %k1
+; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm0 # 64-byte Reload
+; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm1 # 64-byte Reload
+; X32-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1}
+; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: leal -4(%ebp), %esp
+; X32-NEXT: popl %ebx
+; X32-NEXT: popl %ebp
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_test_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovq %rdi, %k1
+; X64-NEXT: vptestmb %zmm0, %zmm1, %k0 {%k1}
+; X64-NEXT: kmovq %k0, %rax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
+ %1 = icmp ne <64 x i8> %0, zeroinitializer
+ %2 = bitcast i64 %__U to <64 x i1>
+ %3 = and <64 x i1> %1, %2
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define i32 @test_mm512_test_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_test_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestmw %zmm0, %zmm1, %k0
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_test_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestmw %zmm0, %zmm1, %k0
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
+ %1 = icmp ne <32 x i16> %0, zeroinitializer
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @test_mm512_mask_test_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_mask_test_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1}
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_test_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vptestmw %zmm0, %zmm1, %k0 {%k1}
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
+ %1 = icmp ne <32 x i16> %0, zeroinitializer
+ %2 = bitcast i32 %__U to <32 x i1>
+ %3 = and <32 x i1> %1, %2
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define i64 @test_mm512_testn_epi8_mask(<8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_testn_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: pushl %ebp
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: andl $-8, %esp
+; X32-NEXT: subl $8, %esp
+; X32-NEXT: vptestnmb %zmm0, %zmm1, %k0
+; X32-NEXT: kmovq %k0, (%esp)
+; X32-NEXT: movl (%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl %ebp, %esp
+; X32-NEXT: popl %ebp
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_testn_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmb %zmm0, %zmm1, %k0
+; X64-NEXT: kmovq %k0, %rax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
+ %1 = icmp eq <64 x i8> %0, zeroinitializer
+ %2 = bitcast <64 x i1> %1 to i64
+ ret i64 %2
+}
+
+define i64 @test_mm512_mask_testn_epi8_mask(i64 %__U, <8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_mask_testn_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: pushl %ebp
+; X32-NEXT: .cfi_def_cfa_offset 8
+; X32-NEXT: .cfi_offset %ebp, -8
+; X32-NEXT: movl %esp, %ebp
+; X32-NEXT: .cfi_def_cfa_register %ebp
+; X32-NEXT: pushl %ebx
+; X32-NEXT: andl $-64, %esp
+; X32-NEXT: subl $256, %esp # imm = 0x100
+; X32-NEXT: .cfi_offset %ebx, -12
+; X32-NEXT: vmovaps %zmm1, {{[0-9]+}}(%esp) # 64-byte Spill
+; X32-NEXT: vmovaps %zmm0, {{[0-9]+}}(%esp) # 64-byte Spill
+; X32-NEXT: movl 8(%ebp), %eax
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: andb $2, %cl
+; X32-NEXT: shrb %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vpsllw $8, %xmm0, %xmm0
+; X32-NEXT: kmovd %eax, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vpbroadcastw %xmm1, %xmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vpslld $24, %xmm1, %xmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $4, %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vpbroadcastd %xmm1, %xmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $5, %cl
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vpsllq $40, %xmm1, %xmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $6, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $7, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $56, %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movb %ah, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastq %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: andb $2, %cl
+; X32-NEXT: shrb %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movb %ah, %cl
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $12, %ecx
+; X32-NEXT: andl $15, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $13, %ecx
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $14, %ecx
+; X32-NEXT: andl $3, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $15, %ecx
+; X32-NEXT: andl $1, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $16, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $2, %dl
+; X32-NEXT: shrb %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllw $8, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $15, %dl
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $2, %bl
+; X32-NEXT: kmovd %ebx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslld $24, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $4, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $5, %dl
+; X32-NEXT: andb $1, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $40, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $6, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $7, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $56, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $24, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastq %xmm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $2, %dl
+; X32-NEXT: shrb %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm5, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $28, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255]
+; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $29, %ecx
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
+; X32-NEXT: vpblendvb %ymm3, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $30, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm1
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255]
+; X32-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm1
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm1[0,1,2,3],zmm0[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrl $31, %eax
+; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0]
+; X32-NEXT: vpblendvb %ymm7, %ymm1, %ymm0, %ymm0
+; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl 12(%ebp), %eax
+; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: vpmovm2b %k1, %zmm7
+; X32-NEXT: vmovdqa {{.*#+}} ymm6 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm6, %ymm1, %ymm7, %ymm1
+; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: andb $2, %cl
+; X32-NEXT: shrb %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllw $8, %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslld $24, %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $4, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $5, %cl
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $40, %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $6, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrb $7, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $56, %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movb %ah, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastq %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: andb $2, %cl
+; X32-NEXT: shrb %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movb %ah, %cl
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $12, %ecx
+; X32-NEXT: andl $15, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $13, %ecx
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $14, %ecx
+; X32-NEXT: andl $3, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $15, %ecx
+; X32-NEXT: andl $1, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $16, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $2, %dl
+; X32-NEXT: shrb %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllw $8, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $15, %dl
+; X32-NEXT: movl %edx, %ebx
+; X32-NEXT: shrb $2, %bl
+; X32-NEXT: kmovd %ebx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $3, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslld $24, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $4, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastd %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $5, %dl
+; X32-NEXT: andb $1, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $40, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $6, %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: shrb $7, %cl
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpsllq $56, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $24, %ecx
+; X32-NEXT: kmovd %ecx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpbroadcastq %xmm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: andb $2, %dl
+; X32-NEXT: shrb %dl
+; X32-NEXT: kmovd %edx, %k1
+; X32-NEXT: vpmovm2b %k1, %zmm0
+; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6]
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: andb $15, %cl
+; X32-NEXT: movl %ecx, %edx
+; X32-NEXT: shrb $2, %dl
+; X32-NEXT: kmovd %edx, %k0
+; X32-NEXT: vpmovb2m %zmm0, %k1
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: vpmovm2b %k1, %zmm1
+; X32-NEXT: vextracti64x4 $1, %zmm1, %ymm6
+; X32-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255]
+; X32-NEXT: vpblendvb %ymm7, %ymm6, %ymm0, %ymm0
+; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: shrb $3, %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm1
+; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0,1,2,3,4]
+; X32-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
+; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm6
+; X32-NEXT: vpblendvb %ymm5, %ymm6, %ymm1, %ymm1
+; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $28, %ecx
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm5
+; X32-NEXT: vpbroadcastd %xmm5, %xmm5
+; X32-NEXT: vinserti128 $1, %xmm5, %ymm0, %ymm5
+; X32-NEXT: vpblendvb %ymm4, %ymm1, %ymm5, %ymm1
+; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $29, %ecx
+; X32-NEXT: andb $1, %cl
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm4
+; X32-NEXT: vpslldq {{.*#+}} xmm4 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm4[0,1,2]
+; X32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
+; X32-NEXT: vpblendvb %ymm3, %ymm1, %ymm4, %ymm1
+; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm0
+; X32-NEXT: vextracti64x4 $1, %zmm0, %ymm1
+; X32-NEXT: movl %eax, %ecx
+; X32-NEXT: shrl $30, %ecx
+; X32-NEXT: kmovd %ecx, %k0
+; X32-NEXT: vpmovm2b %k0, %zmm3
+; X32-NEXT: vpbroadcastw %xmm3, %xmm3
+; X32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
+; X32-NEXT: vpblendvb %ymm2, %ymm1, %ymm3, %ymm1
+; X32-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
+; X32-NEXT: vpmovb2m %zmm0, %k0
+; X32-NEXT: kshiftlq $1, %k0, %k0
+; X32-NEXT: kshiftrq $1, %k0, %k0
+; X32-NEXT: shrl $31, %eax
+; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: kshiftlq $63, %k1, %k1
+; X32-NEXT: korq %k1, %k0, %k1
+; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm0 # 64-byte Reload
+; X32-NEXT: vmovdqa64 {{[0-9]+}}(%esp), %zmm1 # 64-byte Reload
+; X32-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1}
+; X32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: leal -4(%ebp), %esp
+; X32-NEXT: popl %ebx
+; X32-NEXT: popl %ebp
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_testn_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovq %rdi, %k1
+; X64-NEXT: vptestnmb %zmm0, %zmm1, %k0 {%k1}
+; X64-NEXT: kmovq %k0, %rax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <64 x i8>
+ %1 = icmp eq <64 x i8> %0, zeroinitializer
+ %2 = bitcast i64 %__U to <64 x i1>
+ %3 = and <64 x i1> %1, %2
+ %4 = bitcast <64 x i1> %3 to i64
+ ret i64 %4
+}
+
+define i32 @test_mm512_testn_epi16_mask(<8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_testn_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmw %zmm0, %zmm1, %k0
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_testn_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmw %zmm0, %zmm1, %k0
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
+ %1 = icmp eq <32 x i16> %0, zeroinitializer
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @test_mm512_mask_testn_epi16_mask(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
+; X32-LABEL: test_mm512_mask_testn_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1}
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm512_mask_testn_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vptestnmw %zmm0, %zmm1, %k0 {%k1}
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and1.i.i = and <8 x i64> %__B, %__A
+ %0 = bitcast <8 x i64> %and1.i.i to <32 x i16>
+ %1 = icmp eq <32 x i16> %0, zeroinitializer
+ %2 = bitcast i32 %__U to <32 x i1>
+ %3 = and <32 x i1> %1, %2
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
!0 = !{i32 1}
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Mon Nov 13 04:51:18 2017
@@ -2,7 +2,6 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512BW
; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
-
declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
@@ -3795,3 +3794,135 @@ define <64 x i8>@test_int_x86_avx512_mas
ret <64 x i8> %res2
}
+declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64)
+
+define i64 at test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_ptestm_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k1 {%k1}
+; AVX512BW-NEXT: kmovq %k1, %rcx
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $20, %esp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
+; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k1 {%k1}
+; AVX512F-32-NEXT: kmovq %k1, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $20, %esp
+; AVX512F-32-NEXT: vzeroupper
+; AVX512F-32-NEXT: retl
+ %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
+ %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
+ %res2 = add i64 %res, %res1
+ ret i64 %res2
+}
+
+declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32)
+
+define i32 at test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_ptestm_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k1 {%k1}
+; AVX512BW-NEXT: kmovd %k1, %ecx
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k1 {%k1}
+; AVX512F-32-NEXT: kmovd %k1, %ecx
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: vzeroupper
+; AVX512F-32-NEXT: retl
+ %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
+ %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
+ %res2 = add i32 %res, %res1
+ ret i32 %res2
+}
+
+declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2)
+
+define i64 at test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_b_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovq %rdi, %k1
+; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k1 {%k1}
+; AVX512BW-NEXT: kmovq %k1, %rcx
+; AVX512BW-NEXT: kmovq %k0, %rax
+; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: subl $20, %esp
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
+; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovq {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k1 {%k1}
+; AVX512F-32-NEXT: kmovq %k1, (%esp)
+; AVX512F-32-NEXT: movl (%esp), %eax
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
+; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
+; AVX512F-32-NEXT: adcl {{[0-9]+}}(%esp), %edx
+; AVX512F-32-NEXT: addl $20, %esp
+; AVX512F-32-NEXT: vzeroupper
+; AVX512F-32-NEXT: retl
+ %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
+ %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
+ %res2 = add i64 %res, %res1
+ ret i64 %res2
+}
+
+declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2)
+
+define i32 at test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k1 {%k1}
+; AVX512BW-NEXT: kmovd %k1, %ecx
+; AVX512BW-NEXT: kmovd %k0, %eax
+; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_w_512:
+; AVX512F-32: # BB#0:
+; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0
+; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k1 {%k1}
+; AVX512F-32-NEXT: kmovd %k1, %ecx
+; AVX512F-32-NEXT: kmovd %k0, %eax
+; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: vzeroupper
+; AVX512F-32-NEXT: retl
+ %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
+ %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
+ %res2 = add i32 %res, %res1
+ ret i32 %res2
+}
+
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics.ll Mon Nov 13 04:51:18 2017
@@ -1683,134 +1683,6 @@ define <32 x i16>@test_int_x86_avx512_ma
ret <32 x i16> %res4
}
-declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64)
-
-define i64 at test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
-; AVX512BW-LABEL: test_int_x86_avx512_ptestm_b_512:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: kmovq %rdi, %k1
-; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1}
-; AVX512BW-NEXT: kmovq %k0, %rcx
-; AVX512BW-NEXT: vptestmb %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: addq %rcx, %rax
-; AVX512BW-NEXT: retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_b_512:
-; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: subl $20, %esp
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
-; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT: kmovq %k0, (%esp)
-; AVX512F-32-NEXT: vptestmb %zmm1, %zmm0, %k0
-; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
-; AVX512F-32-NEXT: movl (%esp), %eax
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: addl $20, %esp
-; AVX512F-32-NEXT: retl
- %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
- %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
- %res2 = add i64 %res, %res1
- ret i64 %res2
-}
-
-declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32)
-
-define i32 at test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
-; AVX512BW-LABEL: test_int_x86_avx512_ptestm_w_512:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: kmovd %edi, %k1
-; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1}
-; AVX512BW-NEXT: kmovd %k0, %ecx
-; AVX512BW-NEXT: vptestmw %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: addl %ecx, %eax
-; AVX512BW-NEXT: retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_ptestm_w_512:
-; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT: kmovd %k0, %ecx
-; AVX512F-32-NEXT: vptestmw %zmm1, %zmm0, %k0
-; AVX512F-32-NEXT: kmovd %k0, %eax
-; AVX512F-32-NEXT: addl %ecx, %eax
-; AVX512F-32-NEXT: retl
- %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
- %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
- %res2 = add i32 %res, %res1
- ret i32 %res2
-}
-
-declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2)
-
-define i64 at test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
-; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_b_512:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: kmovq %rdi, %k1
-; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1}
-; AVX512BW-NEXT: kmovq %k0, %rcx
-; AVX512BW-NEXT: vptestnmb %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: addq %rcx, %rax
-; AVX512BW-NEXT: retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_b_512:
-; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: subl $20, %esp
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 24
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k0
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: kunpckdq %k0, %k1, %k1
-; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT: kmovq %k0, (%esp)
-; AVX512F-32-NEXT: vptestnmb %zmm1, %zmm0, %k0
-; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
-; AVX512F-32-NEXT: movl (%esp), %eax
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
-; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: addl $20, %esp
-; AVX512F-32-NEXT: retl
- %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
- %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
- %res2 = add i64 %res, %res1
- ret i64 %res2
-}
-
-declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2)
-
-define i32 at test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
-; AVX512BW-LABEL: test_int_x86_avx512_ptestnm_w_512:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: kmovd %edi, %k1
-; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1}
-; AVX512BW-NEXT: kmovd %k0, %ecx
-; AVX512BW-NEXT: vptestnmw %zmm1, %zmm0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: addl %ecx, %eax
-; AVX512BW-NEXT: retq
-;
-; AVX512F-32-LABEL: test_int_x86_avx512_ptestnm_w_512:
-; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
-; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0 {%k1}
-; AVX512F-32-NEXT: kmovd %k0, %ecx
-; AVX512F-32-NEXT: vptestnmw %zmm1, %zmm0, %k0
-; AVX512F-32-NEXT: kmovd %k0, %eax
-; AVX512F-32-NEXT: addl %ecx, %eax
-; AVX512F-32-NEXT: retl
- %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
- %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
- %res2 = add i32 %res, %res1
- ret i32 %res2
-}
-
define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) {
; AVX512BW-LABEL: test_x86_avx512_psll_w_512:
; AVX512BW: ## BB#0:
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-fast-isel.ll Mon Nov 13 04:51:18 2017
@@ -4,6 +4,400 @@
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c
+define zeroext i16 @test_mm_test_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_test_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestmb %xmm0, %xmm1, %k0
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_test_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestmb %xmm0, %xmm1, %k0
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
+ %1 = icmp ne <16 x i8> %0, zeroinitializer
+ %2 = bitcast <16 x i1> %1 to i16
+ ret i16 %2
+}
+
+define zeroext i16 @test_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_mask_test_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1}
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_test_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1}
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
+ %1 = icmp ne <16 x i8> %0, zeroinitializer
+ %2 = bitcast i16 %__U to <16 x i1>
+ %3 = and <16 x i1> %1, %2
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define i32 @test_mm256_test_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_test_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestmb %ymm0, %ymm1, %k0
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_test_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestmb %ymm0, %ymm1, %k0
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
+ %1 = icmp ne <32 x i8> %0, zeroinitializer
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @test_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_mask_test_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1}
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_test_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1}
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
+ %1 = icmp ne <32 x i8> %0, zeroinitializer
+ %2 = bitcast i32 %__U to <32 x i1>
+ %3 = and <32 x i1> %1, %2
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i8 @test_mm_test_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_test_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestmw %xmm0, %xmm1, %k0
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_test_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestmw %xmm0, %xmm1, %k0
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
+ %1 = icmp ne <8 x i16> %0, zeroinitializer
+ %2 = bitcast <8 x i1> %1 to i8
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_mask_test_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1}
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_test_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1}
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
+ %1 = icmp ne <8 x i16> %0, zeroinitializer
+ %2 = bitcast i8 %__U to <8 x i1>
+ %3 = and <8 x i1> %1, %2
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i16 @test_mm256_test_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_test_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestmw %ymm0, %ymm1, %k0
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_test_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestmw %ymm0, %ymm1, %k0
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
+ %1 = icmp ne <16 x i16> %0, zeroinitializer
+ %2 = bitcast <16 x i1> %1 to i16
+ ret i16 %2
+}
+
+define zeroext i16 @test_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_mask_test_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1}
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_test_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1}
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
+ %1 = icmp ne <16 x i16> %0, zeroinitializer
+ %2 = bitcast i16 %__U to <16 x i1>
+ %3 = and <16 x i1> %1, %2
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define zeroext i16 @test_mm_testn_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_testn_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmb %xmm0, %xmm1, %k0
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_testn_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmb %xmm0, %xmm1, %k0
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
+ %1 = icmp eq <16 x i8> %0, zeroinitializer
+ %2 = bitcast <16 x i1> %1 to i16
+ ret i16 %2
+}
+
+define zeroext i16 @test_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_mask_testn_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1}
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_testn_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1}
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
+ %1 = icmp eq <16 x i8> %0, zeroinitializer
+ %2 = bitcast i16 %__U to <16 x i1>
+ %3 = and <16 x i1> %1, %2
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
+define i32 @test_mm256_testn_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_testn_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmb %ymm0, %ymm1, %k0
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_testn_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmb %ymm0, %ymm1, %k0
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
+ %1 = icmp eq <32 x i8> %0, zeroinitializer
+ %2 = bitcast <32 x i1> %1 to i32
+ ret i32 %2
+}
+
+define i32 @test_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_mask_testn_epi8_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: kmovd {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1}
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_testn_epi8_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1}
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
+ %1 = icmp eq <32 x i8> %0, zeroinitializer
+ %2 = bitcast i32 %__U to <32 x i1>
+ %3 = and <32 x i1> %1, %2
+ %4 = bitcast <32 x i1> %3 to i32
+ ret i32 %4
+}
+
+define zeroext i8 @test_mm_testn_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_testn_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmw %xmm0, %xmm1, %k0
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_testn_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmw %xmm0, %xmm1, %k0
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
+ %1 = icmp eq <8 x i16> %0, zeroinitializer
+ %2 = bitcast <8 x i1> %1 to i8
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_mask_testn_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovd %eax, %k1
+; X32-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1}
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_testn_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1}
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
+ %1 = icmp eq <8 x i16> %0, zeroinitializer
+ %2 = bitcast i8 %__U to <8 x i1>
+ %3 = and <8 x i1> %1, %2
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i16 @test_mm256_testn_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_testn_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmw %ymm0, %ymm1, %k0
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_testn_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmw %ymm0, %ymm1, %k0
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
+ %1 = icmp eq <16 x i16> %0, zeroinitializer
+ %2 = bitcast <16 x i1> %1 to i16
+ ret i16 %2
+}
+
+define zeroext i16 @test_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_mask_testn_epi16_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
+; X32-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1}
+; X32-NEXT: kmovd %k0, %eax
+; X32-NEXT: movzwl %ax, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_testn_epi16_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovd %edi, %k1
+; X64-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1}
+; X64-NEXT: kmovd %k0, %eax
+; X64-NEXT: movzwl %ax, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
+ %1 = icmp eq <16 x i16> %0, zeroinitializer
+ %2 = bitcast i16 %__U to <16 x i1>
+ %3 = and <16 x i1> %1, %2
+ %4 = bitcast <16 x i1> %3 to i16
+ ret i16 %4
+}
+
define <2 x i64> @test_mm_mask_set1_epi8(<2 x i64> %__O, i16 zeroext %__M, i8 signext %__A) local_unnamed_addr #0 {
; X32-LABEL: test_mm_mask_set1_epi8:
; X32: # BB#0: # %entry
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll Mon Nov 13 04:51:18 2017
@@ -3672,3 +3672,157 @@ define <16 x i16>@test_int_x86_avx512_ma
declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16)
+
+define i16 at test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestm_b_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x26,0xc9]
+; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
+ %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
+ %res2 = add i16 %res, %res1
+ ret i16 %res2
+}
+
+declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32)
+
+define i32 at test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestm_b_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x26,0xc9]
+; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
+ %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
+ %res2 = add i32 %res, %res1
+ ret i32 %res2
+}
+
+declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8)
+
+define i8 at test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestm_w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x26,0xc9]
+; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16)
+
+define i16 at test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestm_w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x26,0xc9]
+; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
+ %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
+ %res2 = add i16 %res, %res1
+ ret i16 %res2
+}
+
+declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16)
+
+define i16 at test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x26,0xc9]
+; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
+ %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
+ %res2 = add i16 %res, %res1
+ ret i16 %res2
+}
+
+declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32)
+
+define i32 at test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x26,0xc9]
+; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
+ %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
+ %res2 = add i32 %res, %res1
+ ret i32 %res2
+}
+
+declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2)
+
+define i8 at test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x26,0xc9]
+; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2)
+
+define i16 at test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
+; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
+; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x26,0xc9]
+; CHECK-NEXT: kmovd %k1, %ecx ## encoding: [0xc5,0xfb,0x93,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
+; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
+; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
+ %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
+ %res2 = add i16 %res, %res1
+ ret i16 %res2
+}
+
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll Mon Nov 13 04:51:18 2017
@@ -2515,154 +2515,3 @@ define <16 x i16>@test_int_x86_avx512_ma
ret <16 x i16> %res4
}
-declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16)
-
-define i16 at test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestm_b_128:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vptestmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
-; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
- %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
- %res2 = add i16 %res, %res1
- ret i16 %res2
-}
-
-declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32)
-
-define i32 at test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestm_b_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vptestmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
- %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
- %res2 = add i32 %res, %res1
- ret i32 %res2
-}
-
-declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8)
-
-define i8 at test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestm_w_128:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vptestmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16)
-
-define i16 at test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestm_w_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vptestmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
-; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
- %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
- %res2 = add i16 %res, %res1
- ret i16 %res2
-}
-
-declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16)
-
-define i16 at test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_128:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vptestnmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
-; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
- %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
- %res2 = add i16 %res, %res1
- ret i16 %res2
-}
-
-declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32)
-
-define i32 at test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vptestnmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
- %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
- %res2 = add i32 %res, %res1
- ret i32 %res2
-}
-
-declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2)
-
-define i8 at test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_128:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vptestnmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2)
-
-define i16 at test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
-; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vptestnmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x26,0xc1]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: addl %ecx, %eax ## encoding: [0x01,0xc8]
-; CHECK-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
- %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
- %res2 = add i16 %res, %res1
- ret i16 %res2
-}
-
-
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-fast-isel.ll Mon Nov 13 04:51:18 2017
@@ -233,6 +233,424 @@ entry:
ret <4 x i64> %1
}
+define zeroext i8 @test_mm_test_epi32_mask(<2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_test_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestmd %xmm0, %xmm1, %k0
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_test_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestmd %xmm0, %xmm1, %k0
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <4 x i32>
+ %1 = icmp ne <4 x i32> %0, zeroinitializer
+ %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = bitcast <8 x i1> %2 to i8
+ ret i8 %3
+}
+
+define zeroext i8 @test_mm_mask_test_epi32_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_mask_test_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vptestmd %xmm0, %xmm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_test_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestmd %xmm0, %xmm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <4 x i32>
+ %1 = icmp ne <4 x i32> %0, zeroinitializer
+ %2 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = and <4 x i1> %1, %extract.i
+ %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %5 = bitcast <8 x i1> %4 to i8
+ ret i8 %5
+}
+
+define zeroext i8 @test_mm256_test_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_test_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestmd %ymm0, %ymm1, %k0
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_test_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestmd %ymm0, %ymm1, %k0
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <8 x i32>
+ %1 = icmp ne <8 x i32> %0, zeroinitializer
+ %2 = bitcast <8 x i1> %1 to i8
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm256_mask_test_epi32_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_mask_test_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_test_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestmd %ymm0, %ymm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <8 x i32>
+ %1 = icmp ne <8 x i32> %0, zeroinitializer
+ %2 = bitcast i8 %__U to <8 x i1>
+ %3 = and <8 x i1> %1, %2
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_mm_test_epi64_mask(<2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_test_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestmq %xmm0, %xmm1, %k0
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_test_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestmq %xmm0, %xmm1, %k0
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = icmp ne <2 x i64> %and.i.i, zeroinitializer
+ %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %2 = bitcast <8 x i1> %1 to i8
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm_mask_test_epi64_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_mask_test_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vptestmq %xmm0, %xmm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_test_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestmq %xmm0, %xmm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = icmp ne <2 x i64> %and.i.i, zeroinitializer
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %2 = and <2 x i1> %0, %extract.i
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_mm256_test_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_test_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestmq %ymm0, %ymm1, %k0
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_test_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestmq %ymm0, %ymm1, %k0
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = icmp ne <4 x i64> %and.i.i, zeroinitializer
+ %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %2 = bitcast <8 x i1> %1 to i8
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm256_mask_test_epi64_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_mask_test_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vptestmq %ymm0, %ymm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_test_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestmq %ymm0, %ymm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = icmp ne <4 x i64> %and.i.i, zeroinitializer
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = and <4 x i1> %0, %extract.i
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_mm_testn_epi32_mask(<2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_testn_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmd %xmm0, %xmm1, %k0
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_testn_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmd %xmm0, %xmm1, %k0
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <4 x i32>
+ %1 = icmp eq <4 x i32> %0, zeroinitializer
+ %2 = shufflevector <4 x i1> %1, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %3 = bitcast <8 x i1> %2 to i8
+ ret i8 %3
+}
+
+define zeroext i8 @test_mm_mask_testn_epi32_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_mask_testn_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vptestnmd %xmm0, %xmm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_testn_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestnmd %xmm0, %xmm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = bitcast <2 x i64> %and.i.i to <4 x i32>
+ %1 = icmp eq <4 x i32> %0, zeroinitializer
+ %2 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %3 = and <4 x i1> %1, %extract.i
+ %4 = shufflevector <4 x i1> %3, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %5 = bitcast <8 x i1> %4 to i8
+ ret i8 %5
+}
+
+define zeroext i8 @test_mm256_testn_epi32_mask(<4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_testn_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmd %ymm0, %ymm1, %k0
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_testn_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmd %ymm0, %ymm1, %k0
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <8 x i32>
+ %1 = icmp eq <8 x i32> %0, zeroinitializer
+ %2 = bitcast <8 x i1> %1 to i8
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm256_mask_testn_epi32_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_mask_testn_epi32_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_testn_epi32_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestnmd %ymm0, %ymm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = bitcast <4 x i64> %and.i.i to <8 x i32>
+ %1 = icmp eq <8 x i32> %0, zeroinitializer
+ %2 = bitcast i8 %__U to <8 x i1>
+ %3 = and <8 x i1> %1, %2
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_mm_testn_epi64_mask(<2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_testn_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmq %xmm0, %xmm1, %k0
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_testn_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmq %xmm0, %xmm1, %k0
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = icmp eq <2 x i64> %and.i.i, zeroinitializer
+ %1 = shufflevector <2 x i1> %0, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %2 = bitcast <8 x i1> %1 to i8
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm_mask_testn_epi64_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
+; X32-LABEL: test_mm_mask_testn_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vptestnmq %xmm0, %xmm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm_mask_testn_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestnmq %xmm0, %xmm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <2 x i64> %__B, %__A
+ %0 = icmp eq <2 x i64> %and.i.i, zeroinitializer
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
+ %2 = and <2 x i1> %0, %extract.i
+ %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
+define zeroext i8 @test_mm256_testn_epi64_mask(<4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_testn_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: vptestnmq %ymm0, %ymm1, %k0
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_testn_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: vptestnmq %ymm0, %ymm1, %k0
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = icmp eq <4 x i64> %and.i.i, zeroinitializer
+ %1 = shufflevector <4 x i1> %0, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %2 = bitcast <8 x i1> %1 to i8
+ ret i8 %2
+}
+
+define zeroext i8 @test_mm256_mask_testn_epi64_mask(i8 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
+; X32-LABEL: test_mm256_mask_testn_epi64_mask:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: kmovw %eax, %k1
+; X32-NEXT: vptestnmq %ymm0, %ymm1, %k0 {%k1}
+; X32-NEXT: kmovw %k0, %eax
+; X32-NEXT: movzbl %al, %eax
+; X32-NEXT: vzeroupper
+; X32-NEXT: retl
+;
+; X64-LABEL: test_mm256_mask_testn_epi64_mask:
+; X64: # BB#0: # %entry
+; X64-NEXT: kmovw %edi, %k1
+; X64-NEXT: vptestnmq %ymm0, %ymm1, %k0 {%k1}
+; X64-NEXT: kmovw %k0, %eax
+; X64-NEXT: movzbl %al, %eax
+; X64-NEXT: vzeroupper
+; X64-NEXT: retq
+entry:
+ %and.i.i = and <4 x i64> %__B, %__A
+ %0 = icmp eq <4 x i64> %and.i.i, zeroinitializer
+ %1 = bitcast i8 %__U to <8 x i1>
+ %extract.i = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %2 = and <4 x i1> %0, %extract.i
+ %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %4 = bitcast <8 x i1> %3 to i8
+ ret i8 %4
+}
+
define <2 x i64> @test_mm_mask_set1_epi32(<2 x i64> %__O, i8 zeroext %__M) {
; X32-LABEL: test_mm_mask_set1_epi32:
; X32: # BB#0: # %entry
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Mon Nov 13 04:51:18 2017
@@ -5992,3 +5992,155 @@ define <8 x i32>@test_int_x86_avx512_mas
ret <8 x i32> %res2
}
+declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8)
+
+define i8 at test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestm_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9]
+; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8)
+
+define i8 at test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestm_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x27,0xc9]
+; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8)
+
+define i8 at test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestm_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9]
+; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8)
+
+define i8 at test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestm_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9]
+; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2)
+
+define i8 at test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9]
+; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2)
+
+define i8 at test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x27,0xc9]
+; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2)
+
+define i8 at test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9]
+; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
+declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2)
+
+define i8 at test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
+; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
+; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9]
+; CHECK-NEXT: kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
+; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq ## encoding: [0xc3]
+ %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
+ %res2 = add i8 %res, %res1
+ ret i8 %res2
+}
+
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll?rev=318036&r1=318035&r2=318036&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll Mon Nov 13 04:51:18 2017
@@ -3859,160 +3859,6 @@ define <8 x float>@test_int_x86_avx512_m
ret <8 x float> %res4
}
-declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8)
-
-define i8 at test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestm_d_128:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vptestmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8)
-
-define i8 at test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestm_d_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vptestmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8)
-
-define i8 at test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestm_q_128:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vptestmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8)
-
-define i8 at test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestm_q_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vptestmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2)
-
-define i8 at test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_128:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vptestnmd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2)
-
-define i8 at test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestnm_d_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vptestnmd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2)
-
-define i8 at test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_128:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vptestnmq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2)
-
-define i8 at test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
-; CHECK-LABEL: test_int_x86_avx512_ptestnm_q_256:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vptestnmq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
-; CHECK-NEXT: addb %cl, %al ## encoding: [0x00,0xc8]
-; CHECK-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
-; CHECK-NEXT: retq ## encoding: [0xc3]
- %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
- %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
- %res2 = add i8 %res, %res1
- ret i8 %res2
-}
-
-
-
define <2 x i64> @test_x86_avx512_psra_q_128(<2 x i64> %a0, <2 x i64> %a1) {
; CHECK-LABEL: test_x86_avx512_psra_q_128:
; CHECK: ## BB#0:
More information about the llvm-commits
mailing list