[llvm] r324661 - [X86] Add DAG combine to constant fold a bitcast of a vXi1 constant build_vector into a scalar integer.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 8 14:26:37 PST 2018
Author: ctopper
Date: Thu Feb 8 14:26:36 2018
New Revision: 324661
URL: http://llvm.org/viewvc/llvm-project?rev=324661&view=rev
Log:
[X86] Add DAG combine to constant fold a bitcast of a vXi1 constant build_vector into a scalar integer.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll
llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=324661&r1=324660&r2=324661&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Thu Feb 8 14:26:36 2018
@@ -30627,6 +30627,20 @@ static SDValue combineBitcast(SDNode *N,
}
}
+ // Try to remove a bitcast of constant vXi1 vector. We have to legalize
+ // most of these to scalar anyway.
+ if (Subtarget.hasAVX512() && VT.isScalarInteger() &&
+ SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 &&
+ ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
+ APInt Imm(SrcVT.getVectorNumElements(), 0);
+ for (unsigned Idx = 0, e = N0.getNumOperands(); Idx < e; ++Idx) {
+ SDValue In = N0.getOperand(Idx);
+ if (!In.isUndef() && (cast<ConstantSDNode>(In)->getZExtValue() & 0x1))
+ Imm.setBit(Idx);
+ }
+ return DAG.getConstant(Imm, SDLoc(N), VT);
+ }
+
// Try to remove bitcasts from input and output of mask arithmetic to
// remove GPR<->K-register crossings.
if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget))
Modified: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll?rev=324661&r1=324660&r2=324661&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll Thu Feb 8 14:26:36 2018
@@ -3164,9 +3164,8 @@ define <8 x i16> @test_cmp_d_512(<16 x i
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; CHECK-NEXT: kxnorw %k0, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
; CHECK-NEXT: retq
%res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -3194,21 +3193,19 @@ define <8 x i16> @test_mask_cmp_d_512(<1
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: vpcmpgtd %zmm0, %zmm1, %k2 {%k1}
; CHECK-NEXT: vpcmpled %zmm1, %zmm0, %k3 {%k1}
-; CHECK-NEXT: kxorw %k0, %k0, %k4
-; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k5 {%k1}
-; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k6 {%k1}
+; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1}
+; CHECK-NEXT: vpcmpled %zmm0, %zmm1, %k5 {%k1}
; CHECK-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k2, %eax
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: vmovd %ecx, %xmm0
; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k3, %eax
; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k4, %eax
-; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
-; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; CHECK-NEXT: kmovw %k6, %eax
+; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k1, %eax
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
@@ -3257,9 +3254,8 @@ define <8 x i16> @test_ucmp_d_512(<16 x
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; CHECK-NEXT: kxnorw %k0, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
-; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
; CHECK-NEXT: retq
%res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -3287,21 +3283,19 @@ define <8 x i16> @test_mask_ucmp_d_512(<
; CHECK-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: vpcmpltud %zmm1, %zmm0, %k2 {%k1}
; CHECK-NEXT: vpcmpleud %zmm1, %zmm0, %k3 {%k1}
-; CHECK-NEXT: kxorw %k0, %k0, %k4
-; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k5 {%k1}
-; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k6 {%k1}
+; CHECK-NEXT: vpcmpneqd %zmm1, %zmm0, %k4 {%k1}
+; CHECK-NEXT: vpcmpnltud %zmm1, %zmm0, %k5 {%k1}
; CHECK-NEXT: vpcmpnleud %zmm1, %zmm0, %k1 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k2, %eax
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: vmovd %ecx, %xmm0
; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k3, %eax
; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k4, %eax
-; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0
-; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; CHECK-NEXT: kmovw %k6, %eax
+; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k1, %eax
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
@@ -3350,8 +3344,7 @@ define <8 x i8> @test_cmp_q_512(<8 x i64
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; CHECK-NEXT: kxnorw %k0, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: movl $255, %eax
; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; CHECK-NEXT: retq
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
@@ -3380,25 +3373,23 @@ define <8 x i8> @test_mask_cmp_q_512(<8
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: vpcmpgtq %zmm0, %zmm1, %k2 {%k1}
; CHECK-NEXT: vpcmpleq %zmm1, %zmm0, %k3 {%k1}
-; CHECK-NEXT: kxorw %k0, %k0, %k4
-; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k5 {%k1}
-; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k6 {%k1}
+; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 {%k1}
+; CHECK-NEXT: vpcmpleq %zmm0, %zmm1, %k5 {%k1}
; CHECK-NEXT: vpcmpgtq %zmm1, %zmm0, %k1 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k2, %eax
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: vmovd %ecx, %xmm0
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k3, %eax
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k4, %eax
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k5, %eax
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; CHECK-NEXT: kmovw %k6, %eax
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k1, %eax
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; CHECK-NEXT: vpinsrb $14, %edi, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
; CHECK-NEXT: retq
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
@@ -3443,8 +3434,7 @@ define <8 x i8> @test_ucmp_q_512(<8 x i6
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k5, %eax
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; CHECK-NEXT: kxnorw %k0, %k0, %k0
-; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: movl $255, %eax
; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; CHECK-NEXT: retq
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
@@ -3473,25 +3463,23 @@ define <8 x i8> @test_mask_ucmp_q_512(<8
; CHECK-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; CHECK-NEXT: vpcmpltuq %zmm1, %zmm0, %k2 {%k1}
; CHECK-NEXT: vpcmpleuq %zmm1, %zmm0, %k3 {%k1}
-; CHECK-NEXT: kxorw %k0, %k0, %k4
-; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k5 {%k1}
-; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k6 {%k1}
+; CHECK-NEXT: vpcmpneqq %zmm1, %zmm0, %k4 {%k1}
+; CHECK-NEXT: vpcmpnltuq %zmm1, %zmm0, %k5 {%k1}
; CHECK-NEXT: vpcmpnleuq %zmm1, %zmm0, %k1 {%k1}
+; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k2, %eax
-; CHECK-NEXT: kmovw %k0, %ecx
-; CHECK-NEXT: vmovd %ecx, %xmm0
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k3, %eax
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k4, %eax
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k5, %eax
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0
-; CHECK-NEXT: kmovw %k6, %eax
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
; CHECK-NEXT: kmovw %k1, %eax
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0
-; CHECK-NEXT: vpinsrb $14, %edi, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
+; CHECK-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0
; CHECK-NEXT: retq
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=324661&r1=324660&r2=324661&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Thu Feb 8 14:26:36 2018
@@ -31,37 +31,11 @@ define i32 @mask16_zext(i16 %x) {
}
define i8 @mask8(i8 %x) {
-; KNL-LABEL: mask8:
-; KNL: ## %bb.0:
-; KNL-NEXT: kxnorw %k0, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: xorb %dil, %al
-; KNL-NEXT: ## kill: def $al killed $al killed $eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: mask8:
-; SKX: ## %bb.0:
-; SKX-NEXT: kxnorw %k0, %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: xorb %dil, %al
-; SKX-NEXT: ## kill: def $al killed $al killed $eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: mask8:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kxnorw %k0, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: xorb %dil, %al
-; AVX512BW-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: mask8:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kxnorw %k0, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: xorb %dil, %al
-; AVX512DQ-NEXT: ## kill: def $al killed $al killed $eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: mask8:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: notb %dil
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%ret = bitcast <8 x i1> %m1 to i8
@@ -69,37 +43,11 @@ define i8 @mask8(i8 %x) {
}
define i32 @mask8_zext(i8 %x) {
-; KNL-LABEL: mask8_zext:
-; KNL: ## %bb.0:
-; KNL-NEXT: kxnorw %k0, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: xorb %dil, %al
-; KNL-NEXT: movzbl %al, %eax
-; KNL-NEXT: retq
-;
-; SKX-LABEL: mask8_zext:
-; SKX: ## %bb.0:
-; SKX-NEXT: kxnorw %k0, %k0, %k0
-; SKX-NEXT: kmovd %k0, %eax
-; SKX-NEXT: xorb %dil, %al
-; SKX-NEXT: movzbl %al, %eax
-; SKX-NEXT: retq
-;
-; AVX512BW-LABEL: mask8_zext:
-; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kxnorw %k0, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: xorb %dil, %al
-; AVX512BW-NEXT: movzbl %al, %eax
-; AVX512BW-NEXT: retq
-;
-; AVX512DQ-LABEL: mask8_zext:
-; AVX512DQ: ## %bb.0:
-; AVX512DQ-NEXT: kxnorw %k0, %k0, %k0
-; AVX512DQ-NEXT: kmovw %k0, %eax
-; AVX512DQ-NEXT: xorb %dil, %al
-; AVX512DQ-NEXT: movzbl %al, %eax
-; AVX512DQ-NEXT: retq
+; CHECK-LABEL: mask8_zext:
+; CHECK: ## %bb.0:
+; CHECK-NEXT: notb %dil
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: retq
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
%m2 = bitcast <8 x i1> %m1 to i8
@@ -125,9 +73,7 @@ define void @mask16_mem(i16* %ptr) {
define void @mask8_mem(i8* %ptr) {
; KNL-LABEL: mask8_mem:
; KNL: ## %bb.0:
-; KNL-NEXT: kxnorw %k0, %k0, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: xorb %al, (%rdi)
+; KNL-NEXT: notb (%rdi)
; KNL-NEXT: retq
;
; SKX-LABEL: mask8_mem:
@@ -139,9 +85,7 @@ define void @mask8_mem(i8* %ptr) {
;
; AVX512BW-LABEL: mask8_mem:
; AVX512BW: ## %bb.0:
-; AVX512BW-NEXT: kxnorw %k0, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: xorb %al, (%rdi)
+; AVX512BW-NEXT: notb (%rdi)
; AVX512BW-NEXT: retq
;
; AVX512DQ-LABEL: mask8_mem:
Modified: llvm/trunk/test/CodeGen/X86/avx512-schedule.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-schedule.ll?rev=324661&r1=324660&r2=324661&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-schedule.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-schedule.ll Thu Feb 8 14:26:36 2018
@@ -6700,18 +6700,14 @@ define i32 @mask16_zext(i16 %x) {
define i8 @mask8(i8 %x) {
; GENERIC-LABEL: mask8:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: xorb %dil, %al # sched: [1:0.33]
-; GENERIC-NEXT: # kill: def $al killed $al killed $eax
+; GENERIC-NEXT: notb %dil # sched: [1:0.33]
+; GENERIC-NEXT: movl %edi, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mask8:
; SKX: # %bb.0:
-; SKX-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: xorb %dil, %al # sched: [1:0.25]
-; SKX-NEXT: # kill: def $al killed $al killed $eax
+; SKX-NEXT: notb %dil # sched: [1:0.25]
+; SKX-NEXT: movl %edi, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
@@ -6722,18 +6718,14 @@ define i8 @mask8(i8 %x) {
define i32 @mask8_zext(i8 %x) {
; GENERIC-LABEL: mask8_zext:
; GENERIC: # %bb.0:
-; GENERIC-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
-; GENERIC-NEXT: kmovd %k0, %eax # sched: [1:0.33]
-; GENERIC-NEXT: xorb %dil, %al # sched: [1:0.33]
-; GENERIC-NEXT: movzbl %al, %eax # sched: [1:0.33]
+; GENERIC-NEXT: notb %dil # sched: [1:0.33]
+; GENERIC-NEXT: movzbl %dil, %eax # sched: [1:0.33]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; SKX-LABEL: mask8_zext:
; SKX: # %bb.0:
-; SKX-NEXT: kxnorw %k0, %k0, %k0 # sched: [1:1.00]
-; SKX-NEXT: kmovd %k0, %eax # sched: [3:1.00]
-; SKX-NEXT: xorb %dil, %al # sched: [1:0.25]
-; SKX-NEXT: movzbl %al, %eax # sched: [1:0.25]
+; SKX-NEXT: notb %dil # sched: [1:0.25]
+; SKX-NEXT: movzbl %dil, %eax # sched: [1:0.25]
; SKX-NEXT: retq # sched: [7:1.00]
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=324661&r1=324660&r2=324661&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Thu Feb 8 14:26:36 2018
@@ -1693,10 +1693,7 @@ define i64 @test_cmp_b_512(<64 x i8> %a0
; AVX512BW-NEXT: addq %rcx, %rax
; AVX512BW-NEXT: vpcmpgtb %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovq %k0, %rcx
-; AVX512BW-NEXT: addq %rax, %rcx
-; AVX512BW-NEXT: kxnorq %k0, %k0, %k0
-; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: leaq -1(%rcx,%rax), %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
@@ -1742,10 +1739,8 @@ define i64 @test_cmp_b_512(<64 x i8> %a0
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: addl %edi, %eax
; AVX512F-32-NEXT: adcl %ecx, %edx
-; AVX512F-32-NEXT: kxnord %k0, %k0, %k0
-; AVX512F-32-NEXT: kmovd %k0, %ecx
-; AVX512F-32-NEXT: addl %ecx, %eax
-; AVX512F-32-NEXT: adcl %ecx, %edx
+; AVX512F-32-NEXT: addl $-1, %eax
+; AVX512F-32-NEXT: adcl $-1, %edx
; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: popl %edi
; AVX512F-32-NEXT: vzeroupper
@@ -1904,10 +1899,7 @@ define i64 @test_ucmp_b_512(<64 x i8> %a
; AVX512BW-NEXT: addq %rcx, %rax
; AVX512BW-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovq %k0, %rcx
-; AVX512BW-NEXT: addq %rax, %rcx
-; AVX512BW-NEXT: kxnorq %k0, %k0, %k0
-; AVX512BW-NEXT: kmovq %k0, %rax
-; AVX512BW-NEXT: addq %rcx, %rax
+; AVX512BW-NEXT: leaq -1(%rcx,%rax), %rax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
@@ -1953,10 +1945,8 @@ define i64 @test_ucmp_b_512(<64 x i8> %a
; AVX512F-32-NEXT: kmovd %k0, %eax
; AVX512F-32-NEXT: addl %edi, %eax
; AVX512F-32-NEXT: adcl %ecx, %edx
-; AVX512F-32-NEXT: kxnord %k0, %k0, %k0
-; AVX512F-32-NEXT: kmovd %k0, %ecx
-; AVX512F-32-NEXT: addl %ecx, %eax
-; AVX512F-32-NEXT: adcl %ecx, %edx
+; AVX512F-32-NEXT: addl $-1, %eax
+; AVX512F-32-NEXT: adcl $-1, %edx
; AVX512F-32-NEXT: popl %esi
; AVX512F-32-NEXT: popl %edi
; AVX512F-32-NEXT: vzeroupper
@@ -2115,10 +2105,7 @@ define i32 @test_cmp_w_512(<32 x i16> %a
; AVX512BW-NEXT: addl %ecx, %eax
; AVX512BW-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %ecx
-; AVX512BW-NEXT: addl %eax, %ecx
-; AVX512BW-NEXT: kxnord %k0, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: leal -1(%rcx,%rax), %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
@@ -2140,10 +2127,7 @@ define i32 @test_cmp_w_512(<32 x i16> %a
; AVX512F-32-NEXT: addl %ecx, %eax
; AVX512F-32-NEXT: vpcmpgtw %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovd %k0, %ecx
-; AVX512F-32-NEXT: addl %eax, %ecx
-; AVX512F-32-NEXT: kxnord %k0, %k0, %k0
-; AVX512F-32-NEXT: kmovd %k0, %eax
-; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: leal -1(%ecx,%eax), %eax
; AVX512F-32-NEXT: vzeroupper
; AVX512F-32-NEXT: retl
%res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
@@ -2256,10 +2240,7 @@ define i32 @test_ucmp_w_512(<32 x i16> %
; AVX512BW-NEXT: addl %ecx, %eax
; AVX512BW-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kmovd %k0, %ecx
-; AVX512BW-NEXT: addl %eax, %ecx
-; AVX512BW-NEXT: kxnord %k0, %k0, %k0
-; AVX512BW-NEXT: kmovd %k0, %eax
-; AVX512BW-NEXT: addl %ecx, %eax
+; AVX512BW-NEXT: leal -1(%rcx,%rax), %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
@@ -2281,10 +2262,7 @@ define i32 @test_ucmp_w_512(<32 x i16> %
; AVX512F-32-NEXT: addl %ecx, %eax
; AVX512F-32-NEXT: vpcmpnleuw %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovd %k0, %ecx
-; AVX512F-32-NEXT: addl %eax, %ecx
-; AVX512F-32-NEXT: kxnord %k0, %k0, %k0
-; AVX512F-32-NEXT: kmovd %k0, %eax
-; AVX512F-32-NEXT: addl %ecx, %eax
+; AVX512F-32-NEXT: leal -1(%ecx,%eax), %eax
; AVX512F-32-NEXT: vzeroupper
; AVX512F-32-NEXT: retl
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll?rev=324661&r1=324660&r2=324661&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-mask-op.ll Thu Feb 8 14:26:36 2018
@@ -4,9 +4,8 @@
define i32 @mask32(i32 %x) {
; CHECK-LABEL: mask32:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kxnord %k0, %k0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: xorl %edi, %eax
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%m0 = bitcast i32 %x to <32 x i1>
%m1 = xor <32 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
@@ -20,9 +19,8 @@ define i32 @mask32(i32 %x) {
define i64 @mask64(i64 %x) {
; CHECK-LABEL: mask64:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kxnorq %k0, %k0, %k0
-; CHECK-NEXT: kmovq %k0, %rax
-; CHECK-NEXT: xorq %rdi, %rax
+; CHECK-NEXT: notq %rdi
+; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: retq
%m0 = bitcast i64 %x to <64 x i1>
%m1 = xor <64 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1,
Modified: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll?rev=324661&r1=324660&r2=324661&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll Thu Feb 8 14:26:36 2018
@@ -2804,9 +2804,9 @@ define <8 x i32> @test_cmp_b_256(<32 x i
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
-; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
+; CHECK-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
+; CHECK-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3]
; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
@@ -2842,25 +2842,26 @@ define <8 x i32> @test_mask_cmp_b_256(<3
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0]
; CHECK-NEXT: vpcmpgtb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
-; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
+; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
-; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
-; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
-; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
+; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x3f,0xc0,0x02]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
-; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
-; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01]
+; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
+; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
-; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
-; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
-; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
-; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
+; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
+; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
+; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
+; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-NEXT: vmovd %r9d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd1]
+; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
+; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
@@ -2902,9 +2903,9 @@ define <8 x i32> @test_ucmp_b_256(<32 x
; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
; CHECK-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
; CHECK-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
-; CHECK-NEXT: kxnord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x46,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
+; CHECK-NEXT: vpblendd $8, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
+; CHECK-NEXT: ## xmm0 = xmm0[0,1,2],xmm1[3]
; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
@@ -2940,25 +2941,26 @@ define <8 x i32> @test_mask_ucmp_b_256(<
; CHECK-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
; CHECK-NEXT: kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0]
; CHECK-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
-; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
+; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
; CHECK-NEXT: vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
-; CHECK-NEXT: kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
-; CHECK-NEXT: kxord %k0, %k0, %k0 ## encoding: [0xc4,0xe1,0xfd,0x47,0xc0]
-; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
+; CHECK-NEXT: kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
; CHECK-NEXT: vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
; CHECK-NEXT: vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
; CHECK-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
; CHECK-NEXT: kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
-; CHECK-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
-; CHECK-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x01]
+; CHECK-NEXT: vmovd %esi, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
+; CHECK-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
; CHECK-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
; CHECK-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
-; CHECK-NEXT: vmovd %r8d, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xc8]
-; CHECK-NEXT: vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
-; CHECK-NEXT: vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
-; CHECK-NEXT: vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
+; CHECK-NEXT: vmovd %ecx, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
+; CHECK-NEXT: vmovd %r8d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
+; CHECK-NEXT: vpunpckldq %xmm1, %xmm2, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
+; CHECK-NEXT: ## xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
+; CHECK-NEXT: vmovd %r9d, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd1]
+; CHECK-NEXT: vpunpcklqdq %xmm2, %xmm1, %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
+; CHECK-NEXT: ## xmm1 = xmm1[0],xmm2[0]
; CHECK-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
@@ -3004,9 +3006,9 @@ define <8 x i16> @test_cmp_w_256(<16 x i
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
-; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
+; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
+; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
@@ -3035,21 +3037,19 @@ define <8 x i16> @test_mask_cmp_w_256(<1
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
; CHECK-NEXT: vpcmpgtw %ymm0, %ymm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0]
; CHECK-NEXT: vpcmplew %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x04]
-; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k6 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x3f,0xf0,0x02]
+; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
+; CHECK-NEXT: vpcmplew %ymm0, %ymm1, %k5 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x3f,0xe8,0x02]
; CHECK-NEXT: vpcmpgtw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3]
; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4]
-; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
-; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
-; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6]
+; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
@@ -3099,9 +3099,9 @@ define <8 x i16> @test_ucmp_w_256(<16 x
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
-; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
+; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
+; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
; CHECK-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
@@ -3130,21 +3130,19 @@ define <8 x i16> @test_mask_ucmp_w_256(<
; CHECK-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
; CHECK-NEXT: vpcmpltuw %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
; CHECK-NEXT: vpcmpleuw %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x04]
-; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xf1,0x05]
+; CHECK-NEXT: vpcmpneqw %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
+; CHECK-NEXT: vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
; CHECK-NEXT: vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3]
; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4]
-; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
-; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
-; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6]
+; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
@@ -3194,9 +3192,9 @@ define <8 x i16> @test_cmp_b_128(<16 x i
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
-; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
+; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
+; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -3224,21 +3222,19 @@ define <8 x i16> @test_mask_cmp_b_128(<1
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
; CHECK-NEXT: vpcmpgtb %xmm0, %xmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0]
; CHECK-NEXT: vpcmpleb %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x04]
-; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k6 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x3f,0xf0,0x02]
+; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
+; CHECK-NEXT: vpcmpleb %xmm0, %xmm1, %k5 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x3f,0xe8,0x02]
; CHECK-NEXT: vpcmpgtb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3]
; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4]
-; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
-; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
-; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6]
+; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
@@ -3287,9 +3283,9 @@ define <8 x i16> @test_ucmp_b_128(<16 x
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
-; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
-; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
+; CHECK-NEXT: vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
+; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
%vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
@@ -3317,21 +3313,19 @@ define <8 x i16> @test_mask_ucmp_b_128(<
; CHECK-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
; CHECK-NEXT: vpcmpltub %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
; CHECK-NEXT: vpcmpleub %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x04]
-; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xf1,0x05]
+; CHECK-NEXT: vpcmpneqb %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
+; CHECK-NEXT: vpcmpnltub %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
; CHECK-NEXT: vpcmpnleub %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3]
; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4]
-; CHECK-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03]
-; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
-; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6]
+; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
@@ -3380,8 +3374,7 @@ define <8 x i8> @test_cmp_w_128(<8 x i16
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
-; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
@@ -3410,25 +3403,23 @@ define <8 x i8> @test_mask_cmp_w_128(<8
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
; CHECK-NEXT: vpcmpgtw %xmm0, %xmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x65,0xd0]
; CHECK-NEXT: vpcmplew %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x04]
-; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k6 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x3f,0xf0,0x02]
+; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04]
+; CHECK-NEXT: vpcmplew %xmm0, %xmm1, %k5 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x3f,0xe8,0x02]
; CHECK-NEXT: vpcmpgtw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3]
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
-; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6]
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1]
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: vpinsrb $14, %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0e]
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
@@ -3473,8 +3464,7 @@ define <8 x i8> @test_ucmp_w_128(<8 x i1
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
-; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0]
-; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
@@ -3503,25 +3493,23 @@ define <8 x i8> @test_mask_ucmp_w_128(<8
; CHECK-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
; CHECK-NEXT: vpcmpltuw %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01]
; CHECK-NEXT: vpcmpleuw %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x04]
-; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xf1,0x05]
+; CHECK-NEXT: vpcmpneqw %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04]
+; CHECK-NEXT: vpcmpnltuw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x05]
; CHECK-NEXT: vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
+; CHECK-NEXT: kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; CHECK-NEXT: kmovd %k2, %eax ## encoding: [0xc5,0xfb,0x93,0xc2]
-; CHECK-NEXT: kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; CHECK-NEXT: kmovd %k3, %eax ## encoding: [0xc5,0xfb,0x93,0xc3]
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovd %k4, %eax ## encoding: [0xc5,0xfb,0x93,0xc4]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: kmovd %k5, %eax ## encoding: [0xc5,0xfb,0x93,0xc5]
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
-; CHECK-NEXT: kmovd %k6, %eax ## encoding: [0xc5,0xfb,0x93,0xc6]
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovd %k1, %eax ## encoding: [0xc5,0xfb,0x93,0xc1]
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: vpinsrb $14, %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0e]
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
Modified: llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll?rev=324661&r1=324660&r2=324661&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512dq-mask-op.ll Thu Feb 8 14:26:36 2018
@@ -4,10 +4,8 @@
define i8 @mask8(i8 %x) {
; CHECK-LABEL: mask8:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kxnorw %k0, %k0, %k0
-; CHECK-NEXT: kmovd %k0, %eax
-; CHECK-NEXT: xorb %dil, %al
-; CHECK-NEXT: ## kill: def $al killed $al killed $eax
+; CHECK-NEXT: notb %dil
+; CHECK-NEXT: movl %edi, %eax
; CHECK-NEXT: retq
%m0 = bitcast i8 %x to <8 x i1>
%m1 = xor <8 x i1> %m0, <i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1, i1 -1>
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll?rev=324661&r1=324660&r2=324661&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics-upgrade.ll Thu Feb 8 14:26:36 2018
@@ -5124,8 +5124,7 @@ define <8 x i8> @test_cmp_d_256(<8 x i32
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
-; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
@@ -5154,25 +5153,23 @@ define <8 x i8> @test_mask_cmp_d_256(<8
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
; CHECK-NEXT: vpcmpgtd %ymm0, %ymm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0]
; CHECK-NEXT: vpcmpled %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x04]
-; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k6 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x1f,0xf0,0x02]
+; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
+; CHECK-NEXT: vpcmpled %ymm0, %ymm1, %k5 {%k1} ## encoding: [0x62,0xf3,0x75,0x29,0x1f,0xe8,0x02]
; CHECK-NEXT: vpcmpgtd %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
-; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: vpinsrb $14, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0e]
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
@@ -5217,8 +5214,7 @@ define <8 x i8> @test_ucmp_d_256(<8 x i3
; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
-; CHECK-NEXT: kxnorw %k0, %k0, %k0 ## encoding: [0xc5,0xfc,0x46,0xc0]
-; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
@@ -5247,25 +5243,23 @@ define <8 x i8> @test_mask_ucmp_d_256(<8
; CHECK-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
; CHECK-NEXT: vpcmpltud %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
; CHECK-NEXT: vpcmpleud %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x04]
-; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xf1,0x05]
+; CHECK-NEXT: vpcmpneqd %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
+; CHECK-NEXT: vpcmpnltud %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05]
; CHECK-NEXT: vpcmpnleud %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
-; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: vpinsrb $14, %edi, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc7,0x0e]
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
@@ -5335,32 +5329,30 @@ define <8 x i8> @test_cmp_q_256(<4 x i64
define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_cmp_q_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
-; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x37,0xd8]
-; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xd1,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x04]
-; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k6 {%k1} ## encoding: [0x62,0xf3,0xf5,0x29,0x1f,0xf0,0x02]
-; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xf9]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT: kmovw %edi, %k2 ## encoding: [0xc5,0xf8,0x92,0xd7]
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k2} ## encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
+; CHECK-NEXT: vpcmpgtq %ymm0, %ymm1, %k1 {%k2} ## encoding: [0x62,0xf2,0xf5,0x2a,0x37,0xc8]
+; CHECK-NEXT: vpcmpleq %ymm1, %ymm0, %k3 {%k2} ## encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xd9,0x02]
+; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k2} ## encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
+; CHECK-NEXT: vpcmpleq %ymm0, %ymm1, %k5 {%k2} ## encoding: [0x62,0xf3,0xf5,0x2a,0x1f,0xe8,0x02]
+; CHECK-NEXT: vpcmpgtq %ymm1, %ymm0, %k6 {%k2} ## encoding: [0x62,0xf2,0xfd,0x2a,0x37,0xf1]
+; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
+; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
@@ -5430,32 +5422,30 @@ define <8 x i8> @test_ucmp_q_256(<4 x i6
define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_ucmp_q_256:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
-; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd9,0x01]
-; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xd1,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1f,0xe9,0x04]
-; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf1,0x05]
-; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x1e,0xf9,0x06]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT: kmovw %edi, %k2 ## encoding: [0xc5,0xf8,0x92,0xd7]
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k2} ## encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
+; CHECK-NEXT: vpcmpltuq %ymm1, %ymm0, %k1 {%k2} ## encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01]
+; CHECK-NEXT: vpcmpleuq %ymm1, %ymm0, %k3 {%k2} ## encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02]
+; CHECK-NEXT: vpcmpneqq %ymm1, %ymm0, %k4 {%k2} ## encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
+; CHECK-NEXT: vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} ## encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05]
+; CHECK-NEXT: vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} ## encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06]
+; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
+; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
@@ -5525,32 +5515,30 @@ define <8 x i8> @test_cmp_d_128(<4 x i32
define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_cmp_d_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
-; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k3 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x66,0xd8]
-; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xd1,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x04]
-; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k6 {%k1} ## encoding: [0x62,0xf3,0x75,0x09,0x1f,0xf0,0x02]
-; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xf9]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT: kmovw %edi, %k2 ## encoding: [0xc5,0xf8,0x92,0xd7]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k2} ## encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
+; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %k1 {%k2} ## encoding: [0x62,0xf1,0x75,0x0a,0x66,0xc8]
+; CHECK-NEXT: vpcmpled %xmm1, %xmm0, %k3 {%k2} ## encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xd9,0x02]
+; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k2} ## encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
+; CHECK-NEXT: vpcmpled %xmm0, %xmm1, %k5 {%k2} ## encoding: [0x62,0xf3,0x75,0x0a,0x1f,0xe8,0x02]
+; CHECK-NEXT: vpcmpgtd %xmm1, %xmm0, %k6 {%k2} ## encoding: [0x62,0xf1,0x7d,0x0a,0x66,0xf1]
+; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
+; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
@@ -5620,32 +5608,30 @@ define <8 x i8> @test_ucmp_d_128(<4 x i3
define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_ucmp_d_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
-; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd9,0x01]
-; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xd1,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1f,0xe9,0x04]
-; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf1,0x05]
-; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x1e,0xf9,0x06]
-; CHECK-NEXT: kshiftlw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0c]
-; CHECK-NEXT: kshiftrw $12, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0c]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT: kmovw %edi, %k2 ## encoding: [0xc5,0xf8,0x92,0xd7]
+; CHECK-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k2} ## encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
+; CHECK-NEXT: vpcmpltud %xmm1, %xmm0, %k1 {%k2} ## encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01]
+; CHECK-NEXT: vpcmpleud %xmm1, %xmm0, %k3 {%k2} ## encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02]
+; CHECK-NEXT: vpcmpneqd %xmm1, %xmm0, %k4 {%k2} ## encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
+; CHECK-NEXT: vpcmpnltud %xmm1, %xmm0, %k5 {%k2} ## encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05]
+; CHECK-NEXT: vpcmpnleud %xmm1, %xmm0, %k6 {%k2} ## encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06]
+; CHECK-NEXT: kshiftlw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
+; CHECK-NEXT: kshiftrw $12, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
@@ -5715,32 +5701,30 @@ define <8 x i8> @test_cmp_q_128(<2 x i64
define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_cmp_q_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
-; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x37,0xd8]
-; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xd1,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04]
-; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k6 {%k1} ## encoding: [0x62,0xf3,0xf5,0x09,0x1f,0xf0,0x02]
-; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xf9]
-; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT: kmovw %edi, %k2 ## encoding: [0xc5,0xf8,0x92,0xd7]
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k2} ## encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
+; CHECK-NEXT: vpcmpgtq %xmm0, %xmm1, %k1 {%k2} ## encoding: [0x62,0xf2,0xf5,0x0a,0x37,0xc8]
+; CHECK-NEXT: vpcmpleq %xmm1, %xmm0, %k3 {%k2} ## encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xd9,0x02]
+; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k2} ## encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
+; CHECK-NEXT: vpcmpleq %xmm0, %xmm1, %k5 {%k2} ## encoding: [0x62,0xf3,0xf5,0x0a,0x1f,0xe8,0x02]
+; CHECK-NEXT: vpcmpgtq %xmm1, %xmm0, %k6 {%k2} ## encoding: [0x62,0xf2,0xfd,0x0a,0x37,0xf1]
+; CHECK-NEXT: kshiftlw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
+; CHECK-NEXT: kshiftrw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
@@ -5810,32 +5794,30 @@ define <8 x i8> @test_ucmp_q_128(<2 x i6
define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
; CHECK-LABEL: test_mask_ucmp_q_128:
; CHECK: ## %bb.0:
-; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
-; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
-; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd9,0x01]
-; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xd1,0x02]
-; CHECK-NEXT: kxorw %k0, %k0, %k4 ## encoding: [0xc5,0xfc,0x47,0xe0]
-; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1f,0xe9,0x04]
-; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf1,0x05]
-; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x1e,0xf9,0x06]
-; CHECK-NEXT: kshiftlw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x0e]
-; CHECK-NEXT: kshiftrw $14, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc9,0x0e]
-; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
-; CHECK-NEXT: kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
-; CHECK-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
-; CHECK-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02]
-; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
-; CHECK-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04]
+; CHECK-NEXT: kmovw %edi, %k2 ## encoding: [0xc5,0xf8,0x92,0xd7]
+; CHECK-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k2} ## encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
+; CHECK-NEXT: vpcmpltuq %xmm1, %xmm0, %k1 {%k2} ## encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xc9,0x01]
+; CHECK-NEXT: vpcmpleuq %xmm1, %xmm0, %k3 {%k2} ## encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xd9,0x02]
+; CHECK-NEXT: vpcmpneqq %xmm1, %xmm0, %k4 {%k2} ## encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
+; CHECK-NEXT: vpcmpnltuq %xmm1, %xmm0, %k5 {%k2} ## encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xe9,0x05]
+; CHECK-NEXT: vpcmpnleuq %xmm1, %xmm0, %k6 {%k2} ## encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xf1,0x06]
+; CHECK-NEXT: kshiftlw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
+; CHECK-NEXT: kshiftrw $14, %k2, %k2 ## encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
+; CHECK-NEXT: kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
+; CHECK-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
+; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
+; CHECK-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
+; CHECK-NEXT: kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
+; CHECK-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
; CHECK-NEXT: kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
-; CHECK-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06]
+; CHECK-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
; CHECK-NEXT: kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
-; CHECK-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08]
+; CHECK-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
; CHECK-NEXT: kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
-; CHECK-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a]
-; CHECK-NEXT: kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
-; CHECK-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c]
-; CHECK-NEXT: kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
-; CHECK-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e]
+; CHECK-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
+; CHECK-NEXT: kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
+; CHECK-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
; CHECK-NEXT: retq ## encoding: [0xc3]
%res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
%vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
More information about the llvm-commits
mailing list