[llvm] r314023 - [x86] swap order of srl (and X, C1), C2 when it saves size
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 22 12:37:22 PDT 2017
Author: spatel
Date: Fri Sep 22 12:37:21 2017
New Revision: 314023
URL: http://llvm.org/viewvc/llvm-project?rev=314023&view=rev
Log:
[x86] swap order of srl (and X, C1), C2 when it saves size
The (non-)obvious win comes from saving 3 bytes by using the 0x83 'and' opcode variant instead of 0x81.
There are also better improvements based on known-bits that allow us to eliminate the mask entirely.
As noted, this could be extended. There are potentially other wins from always shifting first, but doing
that reveals a tangle of problems in other pattern matching. We do this transform generically in
instcombine, but we often have icmp IR that doesn't match that pattern, so we must account for this
in the backend.
Differential Revision: https://reviews.llvm.org/D38181
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
llvm/trunk/test/CodeGen/X86/divide-by-constant.ll
llvm/trunk/test/CodeGen/X86/known-bits.ll
llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll
llvm/trunk/test/CodeGen/X86/test-shrink.ll
llvm/trunk/test/CodeGen/X86/urem-i8-constant.ll
llvm/trunk/test/CodeGen/X86/xor-icmp.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 22 12:37:21 2017
@@ -31762,6 +31762,40 @@ static SDValue combineShiftRightAlgebrai
return SDValue();
}
+static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+
+ // Try to improve a sequence of srl (and X, C1), C2 by inverting the order.
+ // TODO: This is a generic DAG combine that became an x86-only combine to
+ // avoid shortcomings in other folds such as bswap, bit-test ('bt'), and
+ // and-not ('andn').
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+ return SDValue();
+
+ auto *ShiftC = dyn_cast<ConstantSDNode>(N1);
+ auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ if (!ShiftC || !AndC)
+ return SDValue();
+
+ // If the 'and' mask is already smaller than a byte, then don't bother.
+ // If the new 'and' mask would be bigger than a byte, then don't bother.
+ // If the mask fits in a byte, then we know we can generate smaller and
+ // potentially better code by shifting first.
+ // TODO: Always try to shrink a mask that is over 32-bits?
+ APInt MaskVal = AndC->getAPIntValue();
+ APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue());
+ if (MaskVal.getMinSignedBits() <= 8 || NewMaskVal.getMinSignedBits() > 8)
+ return SDValue();
+
+ // srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC)
+ SDLoc DL(N);
+ SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT);
+ SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask);
+}
+
/// \brief Returns a vector of 0s if the node in input is a vector logical
/// shift by a constant amount which is known to be bigger than or equal
/// to the vector element size in bits.
@@ -31804,6 +31838,10 @@ static SDValue combineShift(SDNode* N, S
if (SDValue V = combineShiftRightAlgebraic(N, DAG))
return V;
+ if (N->getOpcode() == ISD::SRL)
+ if (SDValue V = combineShiftRightLogical(N, DAG))
+ return V;
+
// Try to fold this logical shift into a zero vector.
if (N->getOpcode() != ISD::SRA)
if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll Fri Sep 22 12:37:21 2017
@@ -129,8 +129,8 @@ define <8 x i64> @test_mm512_mask_set1_e
; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
; X32-NEXT: vpmovb2m %zmm1, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $61440, %ecx # imm = 0xF000
; X32-NEXT: shrl $12, %ecx
+; X32-NEXT: andl $15, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm1
; X32-NEXT: vpbroadcastd %xmm1, %xmm1
@@ -151,8 +151,8 @@ define <8 x i64> @test_mm512_mask_set1_e
; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
; X32-NEXT: vpmovb2m %zmm1, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $49152, %ecx # imm = 0xC000
; X32-NEXT: shrl $14, %ecx
+; X32-NEXT: andl $3, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm1
; X32-NEXT: vpbroadcastw %xmm1, %xmm1
@@ -162,8 +162,8 @@ define <8 x i64> @test_mm512_mask_set1_e
; X32-NEXT: vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
; X32-NEXT: vpmovb2m %zmm1, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $32768, %ecx # imm = 0x8000
; X32-NEXT: shrl $15, %ecx
+; X32-NEXT: andl $1, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm1
; X32-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
@@ -483,8 +483,8 @@ define <8 x i64> @test_mm512_mask_set1_e
; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $61440, %ecx # imm = 0xF000
; X32-NEXT: shrl $12, %ecx
+; X32-NEXT: andl $15, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm0
; X32-NEXT: vpbroadcastd %xmm0, %xmm0
@@ -507,8 +507,8 @@ define <8 x i64> @test_mm512_mask_set1_e
; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $49152, %ecx # imm = 0xC000
; X32-NEXT: shrl $14, %ecx
+; X32-NEXT: andl $3, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm0
; X32-NEXT: vpbroadcastw %xmm0, %xmm0
@@ -519,8 +519,8 @@ define <8 x i64> @test_mm512_mask_set1_e
; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $32768, %ecx # imm = 0x8000
; X32-NEXT: shrl $15, %ecx
+; X32-NEXT: andl $1, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm0
; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
@@ -860,8 +860,8 @@ define <8 x i64> @test_mm512_maskz_set1_
; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $61440, %ecx # imm = 0xF000
; X32-NEXT: shrl $12, %ecx
+; X32-NEXT: andl $15, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm0
; X32-NEXT: vpbroadcastd %xmm0, %xmm0
@@ -882,8 +882,8 @@ define <8 x i64> @test_mm512_maskz_set1_
; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $49152, %ecx # imm = 0xC000
; X32-NEXT: shrl $14, %ecx
+; X32-NEXT: andl $3, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm0
; X32-NEXT: vpbroadcastw %xmm0, %xmm0
@@ -893,8 +893,8 @@ define <8 x i64> @test_mm512_maskz_set1_
; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $32768, %ecx # imm = 0x8000
; X32-NEXT: shrl $15, %ecx
+; X32-NEXT: andl $1, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm0
; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
@@ -1214,8 +1214,8 @@ define <8 x i64> @test_mm512_maskz_set1_
; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $61440, %ecx # imm = 0xF000
; X32-NEXT: shrl $12, %ecx
+; X32-NEXT: andl $15, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm0
; X32-NEXT: vpbroadcastd %xmm0, %xmm0
@@ -1238,8 +1238,8 @@ define <8 x i64> @test_mm512_maskz_set1_
; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $49152, %ecx # imm = 0xC000
; X32-NEXT: shrl $14, %ecx
+; X32-NEXT: andl $3, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm0
; X32-NEXT: vpbroadcastw %xmm0, %xmm0
@@ -1250,8 +1250,8 @@ define <8 x i64> @test_mm512_maskz_set1_
; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0
; X32-NEXT: vpmovb2m %zmm0, %k0
; X32-NEXT: movl %eax, %ecx
-; X32-NEXT: andl $32768, %ecx # imm = 0x8000
; X32-NEXT: shrl $15, %ecx
+; X32-NEXT: andl $1, %ecx
; X32-NEXT: kmovd %ecx, %k1
; X32-NEXT: vpmovm2b %k1, %zmm0
; X32-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Fri Sep 22 12:37:21 2017
@@ -1734,19 +1734,29 @@ define i64 @test_mask_cmp_b_512(<64 x i8
;
; AVX512F-32-LABEL: test_mask_cmp_b_512:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: pushl %ebx
+; AVX512F-32-NEXT: pushl %ebp
; AVX512F-32-NEXT: .Lcfi5:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
-; AVX512F-32-NEXT: pushl %esi
+; AVX512F-32-NEXT: pushl %ebx
; AVX512F-32-NEXT: .Lcfi6:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
-; AVX512F-32-NEXT: subl $60, %esp
+; AVX512F-32-NEXT: pushl %edi
; AVX512F-32-NEXT: .Lcfi7:
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: pushl %esi
; AVX512F-32-NEXT: .Lcfi8:
-; AVX512F-32-NEXT: .cfi_offset %esi, -12
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 20
+; AVX512F-32-NEXT: subl $60, %esp
; AVX512F-32-NEXT: .Lcfi9:
-; AVX512F-32-NEXT: .cfi_offset %ebx, -8
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 80
+; AVX512F-32-NEXT: .Lcfi10:
+; AVX512F-32-NEXT: .cfi_offset %esi, -20
+; AVX512F-32-NEXT: .Lcfi11:
+; AVX512F-32-NEXT: .cfi_offset %edi, -16
+; AVX512F-32-NEXT: .Lcfi12:
+; AVX512F-32-NEXT: .cfi_offset %ebx, -12
+; AVX512F-32-NEXT: .Lcfi13:
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: movl %ecx, %eax
; AVX512F-32-NEXT: shrb $5, %al
@@ -1862,9 +1872,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT: shrl $12, %eax
+; AVX512F-32-NEXT: movl $1036, %edi # imm = 0x40C
+; AVX512F-32-NEXT: bextrl %edi, %ecx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
@@ -1874,6 +1883,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ecx, %ebp
; AVX512F-32-NEXT: shrl $13, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -1884,9 +1894,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT: shrl $14, %eax
+; AVX512F-32-NEXT: movl $526, %edx # imm = 0x20E
+; AVX512F-32-NEXT: bextrl %edx, %ebp, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
@@ -1894,10 +1903,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
+; AVX512F-32-NEXT: movl $271, %esi # imm = 0x10F
+; AVX512F-32-NEXT: bextrl %esi, %ebp, %eax
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT: shrl $15, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
@@ -1906,9 +1914,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $16, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: shrl $16, %ecx
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
@@ -1916,10 +1923,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: andb $2, %al
+; AVX512F-32-NEXT: shrb %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1928,9 +1935,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %ebx
+; AVX512F-32-NEXT: movl %ecx, %ebx
; AVX512F-32-NEXT: andb $15, %bl
-; AVX512F-32-NEXT: movl %ebx, %edx
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $2, %bl
; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -1941,8 +1948,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: shrb $3, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: shrb $3, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1951,9 +1958,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $4, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $4, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1962,10 +1969,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $5, %dl
-; AVX512F-32-NEXT: andb $1, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $5, %al
+; AVX512F-32-NEXT: andb $1, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1974,9 +1981,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $6, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $6, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1985,9 +1992,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
-; AVX512F-32-NEXT: shrb $7, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> %ECX<def>
+; AVX512F-32-NEXT: shrb $7, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1996,9 +2003,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $24, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %ebp, %ebx
+; AVX512F-32-NEXT: shrl $24, %ebx
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm3
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4
@@ -2006,10 +2013,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm4[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm3, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: andb $2, %al
+; AVX512F-32-NEXT: shrb %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6]
; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm4
@@ -2018,11 +2025,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm4 = zmm4[0,1,2,3],zmm5[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm4, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
+; AVX512F-32-NEXT: andb $15, %bl
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: shrb $2, %bl
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4
; AVX512F-32-NEXT: vpbroadcastw %xmm4, %xmm4
; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm5
@@ -2041,6 +2048,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm7, %ymm5, %ymm5
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0
+; AVX512F-32-NEXT: movl %ebp, %ecx
; AVX512F-32-NEXT: movl %ecx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -2053,7 +2061,6 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0
; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: movl %ecx, %esi
; AVX512F-32-NEXT: shrl $29, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -2065,7 +2072,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm2, %ymm7, %ymm7
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
+; AVX512F-32-NEXT: movl %ebp, %eax
; AVX512F-32-NEXT: shrl $30, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -2076,7 +2083,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
+; AVX512F-32-NEXT: movl %ebp, %eax
; AVX512F-32-NEXT: shrl $31, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -2087,8 +2094,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4
@@ -2096,7 +2103,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: andb $2, %al
; AVX512F-32-NEXT: shrb %al
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -2108,11 +2115,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
@@ -2131,7 +2138,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $4, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -2142,7 +2149,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $5, %al
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -2154,7 +2161,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $6, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -2165,7 +2172,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $7, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -2176,7 +2183,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %al
+; AVX512F-32-NEXT: movb %bh, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2
@@ -2197,11 +2204,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %dl
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movb %bh, %cl
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
@@ -2220,9 +2227,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT: shrl $12, %eax
+; AVX512F-32-NEXT: bextrl %edi, %ebx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
@@ -2232,7 +2237,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $13, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -2244,9 +2249,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT: shrl $14, %eax
+; AVX512F-32-NEXT: bextrl %edx, %ebx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
@@ -2255,10 +2258,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
+; AVX512F-32-NEXT: bextrl %esi, %ebx, %eax
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT: shrl $15, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
@@ -2268,9 +2269,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %ebx
-; AVX512F-32-NEXT: shrl $16, %ebx
-; AVX512F-32-NEXT: kmovd %ebx, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: shrl $16, %eax
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
@@ -2279,10 +2280,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2291,11 +2292,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: andb $15, %al
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $2, %al
-; AVX512F-32-NEXT: kmovd %eax, %k0
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %edx
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k0
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
@@ -2317,9 +2318,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $4, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2329,10 +2330,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $5, %al
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2342,9 +2343,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $6, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $6, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2354,9 +2355,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
-; AVX512F-32-NEXT: shrb $7, %bl
-; AVX512F-32-NEXT: kmovd %ebx, %k1
+; AVX512F-32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
+; AVX512F-32-NEXT: shrb $7, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2366,7 +2367,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $24, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -2377,10 +2378,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6]
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2389,11 +2390,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k0
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k0
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
@@ -2412,11 +2413,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm4
; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $29, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
@@ -2437,7 +2438,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $30, %eax
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
@@ -2450,7 +2451,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm4, %ymm3
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $31, %eax
; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0
@@ -2485,11 +2486,13 @@ define i64 @test_mask_cmp_b_512(<64 x i8
; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: addl %esi, %eax
-; AVX512F-32-NEXT: adcxl %ecx, %edx
+; AVX512F-32-NEXT: addl %ebp, %eax
+; AVX512F-32-NEXT: adcxl %ebx, %edx
; AVX512F-32-NEXT: addl $60, %esp
; AVX512F-32-NEXT: popl %esi
+; AVX512F-32-NEXT: popl %edi
; AVX512F-32-NEXT: popl %ebx
+; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
%res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
@@ -2539,7 +2542,7 @@ define i64 @test_ucmp_b_512(<64 x i8> %a
; AVX512F-32-LABEL: test_ucmp_b_512:
; AVX512F-32: # BB#0:
; AVX512F-32-NEXT: subl $60, %esp
-; AVX512F-32-NEXT: .Lcfi10:
+; AVX512F-32-NEXT: .Lcfi14:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 64
; AVX512F-32-NEXT: vpcmpeqb %zmm1, %zmm0, %k0
; AVX512F-32-NEXT: kmovq %k0, {{[0-9]+}}(%esp)
@@ -2618,19 +2621,29 @@ define i64 @test_mask_x86_avx512_ucmp_b_
;
; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
; AVX512F-32: # BB#0:
-; AVX512F-32-NEXT: pushl %ebx
-; AVX512F-32-NEXT: .Lcfi11:
+; AVX512F-32-NEXT: pushl %ebp
+; AVX512F-32-NEXT: .Lcfi15:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 8
-; AVX512F-32-NEXT: pushl %esi
-; AVX512F-32-NEXT: .Lcfi12:
+; AVX512F-32-NEXT: pushl %ebx
+; AVX512F-32-NEXT: .Lcfi16:
; AVX512F-32-NEXT: .cfi_def_cfa_offset 12
+; AVX512F-32-NEXT: pushl %edi
+; AVX512F-32-NEXT: .Lcfi17:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT: pushl %esi
+; AVX512F-32-NEXT: .Lcfi18:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 20
; AVX512F-32-NEXT: subl $60, %esp
-; AVX512F-32-NEXT: .Lcfi13:
-; AVX512F-32-NEXT: .cfi_def_cfa_offset 72
-; AVX512F-32-NEXT: .Lcfi14:
-; AVX512F-32-NEXT: .cfi_offset %esi, -12
-; AVX512F-32-NEXT: .Lcfi15:
-; AVX512F-32-NEXT: .cfi_offset %ebx, -8
+; AVX512F-32-NEXT: .Lcfi19:
+; AVX512F-32-NEXT: .cfi_def_cfa_offset 80
+; AVX512F-32-NEXT: .Lcfi20:
+; AVX512F-32-NEXT: .cfi_offset %esi, -20
+; AVX512F-32-NEXT: .Lcfi21:
+; AVX512F-32-NEXT: .cfi_offset %edi, -16
+; AVX512F-32-NEXT: .Lcfi22:
+; AVX512F-32-NEXT: .cfi_offset %ebx, -12
+; AVX512F-32-NEXT: .Lcfi23:
+; AVX512F-32-NEXT: .cfi_offset %ebp, -8
; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
; AVX512F-32-NEXT: movl %ecx, %eax
; AVX512F-32-NEXT: shrb $5, %al
@@ -2746,9 +2759,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT: shrl $12, %eax
+; AVX512F-32-NEXT: movl $1036, %edi # imm = 0x40C
+; AVX512F-32-NEXT: bextrl %edi, %ecx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
@@ -2758,6 +2770,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ecx, %ebp
; AVX512F-32-NEXT: shrl $13, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -2768,9 +2781,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT: shrl $14, %eax
+; AVX512F-32-NEXT: movl $526, %edx # imm = 0x20E
+; AVX512F-32-NEXT: bextrl %edx, %ebp, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
@@ -2778,10 +2790,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
+; AVX512F-32-NEXT: movl $271, %esi # imm = 0x10F
+; AVX512F-32-NEXT: bextrl %esi, %ebp, %eax
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT: shrl $15, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
@@ -2790,9 +2801,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $16, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: shrl $16, %ecx
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
@@ -2800,10 +2810,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: andb $2, %al
+; AVX512F-32-NEXT: shrb %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2812,9 +2822,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %ebx
+; AVX512F-32-NEXT: movl %ecx, %ebx
; AVX512F-32-NEXT: andb $15, %bl
-; AVX512F-32-NEXT: movl %ebx, %edx
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $2, %bl
; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -2825,8 +2835,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: shrb $3, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: shrb $3, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslld $24, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2835,9 +2845,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $4, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $4, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2846,10 +2856,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $5, %dl
-; AVX512F-32-NEXT: andb $1, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $5, %al
+; AVX512F-32-NEXT: andb $1, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2858,9 +2868,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $6, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $6, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2869,9 +2879,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
-; AVX512F-32-NEXT: shrb $7, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: # kill: %CL<def> %CL<kill> %ECX<kill> %ECX<def>
+; AVX512F-32-NEXT: shrb $7, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2880,9 +2890,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: shrl $24, %eax
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %ebp, %ebx
+; AVX512F-32-NEXT: shrl $24, %ebx
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastq %xmm2, %ymm3
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm4
@@ -2890,10 +2900,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm4[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm3, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: andb $2, %al
+; AVX512F-32-NEXT: shrb %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm3
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6]
; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm4
@@ -2902,11 +2912,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm4 = zmm4[0,1,2,3],zmm5[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm4, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
+; AVX512F-32-NEXT: andb $15, %bl
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: shrb $2, %bl
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4
; AVX512F-32-NEXT: vpbroadcastw %xmm4, %xmm4
; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm5
@@ -2925,6 +2935,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm7, %ymm5, %ymm5
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0
+; AVX512F-32-NEXT: movl %ebp, %ecx
; AVX512F-32-NEXT: movl %ecx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -2937,7 +2948,6 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm5, %k0
; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: movl %ecx, %esi
; AVX512F-32-NEXT: shrl $29, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -2949,7 +2959,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm3, %ymm2, %ymm7, %ymm7
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
+; AVX512F-32-NEXT: movl %ebp, %eax
; AVX512F-32-NEXT: shrl $30, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -2960,7 +2970,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %esi, %eax
+; AVX512F-32-NEXT: movl %ebp, %eax
; AVX512F-32-NEXT: shrl $31, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -2971,8 +2981,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
; AVX512F-32-NEXT: vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT: kmovd %ecx, %k1
+; AVX512F-32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT: kmovd %ebx, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm3
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm4
@@ -2980,7 +2990,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: andb $2, %al
; AVX512F-32-NEXT: shrb %al
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -2992,11 +3002,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %ebx, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
@@ -3015,7 +3025,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $4, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -3026,7 +3036,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $5, %al
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -3038,7 +3048,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $6, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -3049,7 +3059,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrb $7, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -3060,7 +3070,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %al
+; AVX512F-32-NEXT: movb %bh, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastq %xmm2, %xmm2
@@ -3081,11 +3091,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movb %ch, %dl
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movb %bh, %cl
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
@@ -3104,9 +3114,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT: shrl $12, %eax
+; AVX512F-32-NEXT: bextrl %edi, %ebx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
@@ -3116,7 +3124,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $13, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k1
@@ -3128,9 +3136,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT: shrl $14, %eax
+; AVX512F-32-NEXT: bextrl %edx, %ebx, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
@@ -3139,10 +3145,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
+; AVX512F-32-NEXT: bextrl %esi, %ebx, %eax
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
-; AVX512F-32-NEXT: andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT: shrl $15, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
@@ -3152,9 +3156,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %ebx
-; AVX512F-32-NEXT: shrl $16, %ebx
-; AVX512F-32-NEXT: kmovd %ebx, %k1
+; AVX512F-32-NEXT: movl %ebx, %eax
+; AVX512F-32-NEXT: shrl $16, %eax
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm3
@@ -3163,10 +3167,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllw $8, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3175,11 +3179,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: andb $15, %al
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: shrb $2, %al
-; AVX512F-32-NEXT: kmovd %eax, %k0
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %edx
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k0
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
@@ -3201,9 +3205,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $4, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $4, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastd %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3213,10 +3217,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $5, %al
-; AVX512F-32-NEXT: andb $1, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $5, %cl
+; AVX512F-32-NEXT: andb $1, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $40, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3226,9 +3230,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ebx, %eax
-; AVX512F-32-NEXT: shrb $6, %al
-; AVX512F-32-NEXT: kmovd %eax, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: shrb $6, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3238,9 +3242,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
-; AVX512F-32-NEXT: shrb $7, %bl
-; AVX512F-32-NEXT: kmovd %ebx, %k1
+; AVX512F-32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
+; AVX512F-32-NEXT: shrb $7, %al
+; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpsllq $56, %xmm2, %xmm2
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3250,7 +3254,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $24, %eax
; AVX512F-32-NEXT: kmovd %eax, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
@@ -3261,10 +3265,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $2, %dl
-; AVX512F-32-NEXT: shrb %dl
-; AVX512F-32-NEXT: kmovd %edx, %k1
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $2, %cl
+; AVX512F-32-NEXT: shrb %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k1
; AVX512F-32-NEXT: vpmovm2b %k1, %zmm2
; AVX512F-32-NEXT: vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6]
; AVX512F-32-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3273,11 +3277,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT: movl %eax, %edx
-; AVX512F-32-NEXT: andb $15, %dl
-; AVX512F-32-NEXT: movl %edx, %eax
-; AVX512F-32-NEXT: shrb $2, %dl
-; AVX512F-32-NEXT: kmovd %edx, %k0
+; AVX512F-32-NEXT: movl %eax, %ecx
+; AVX512F-32-NEXT: andb $15, %cl
+; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: shrb $2, %cl
+; AVX512F-32-NEXT: kmovd %ecx, %k0
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
; AVX512F-32-NEXT: vpmovm2b %k0, %zmm2
; AVX512F-32-NEXT: vpbroadcastw %xmm2, %xmm2
@@ -3296,11 +3300,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm3
; AVX512F-32-NEXT: vextracti64x4 $1, %zmm2, %ymm4
; AVX512F-32-NEXT: vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $29, %eax
; AVX512F-32-NEXT: andb $1, %al
; AVX512F-32-NEXT: kmovd %eax, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $28, %eax
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k1
@@ -3321,7 +3325,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm4
; AVX512F-32-NEXT: vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
; AVX512F-32-NEXT: vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $30, %eax
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
@@ -3334,7 +3338,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: vpblendvb %ymm7, %ymm3, %ymm4, %ymm3
; AVX512F-32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
; AVX512F-32-NEXT: vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT: movl %ecx, %eax
+; AVX512F-32-NEXT: movl %ebx, %eax
; AVX512F-32-NEXT: shrl $31, %eax
; AVX512F-32-NEXT: kshiftlq $1, %k0, %k0
; AVX512F-32-NEXT: kshiftrq $1, %k0, %k0
@@ -3369,11 +3373,13 @@ define i64 @test_mask_x86_avx512_ucmp_b_
; AVX512F-32-NEXT: kmovq %k1, {{[0-9]+}}(%esp)
; AVX512F-32-NEXT: addl {{[0-9]+}}(%esp), %eax
; AVX512F-32-NEXT: adcxl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT: addl %esi, %eax
-; AVX512F-32-NEXT: adcxl %ecx, %edx
+; AVX512F-32-NEXT: addl %ebp, %eax
+; AVX512F-32-NEXT: adcxl %ebx, %edx
; AVX512F-32-NEXT: addl $60, %esp
; AVX512F-32-NEXT: popl %esi
+; AVX512F-32-NEXT: popl %edi
; AVX512F-32-NEXT: popl %ebx
+; AVX512F-32-NEXT: popl %ebp
; AVX512F-32-NEXT: retl
%res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
%res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
Modified: llvm/trunk/test/CodeGen/X86/divide-by-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/divide-by-constant.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/divide-by-constant.ll (original)
+++ llvm/trunk/test/CodeGen/X86/divide-by-constant.ll Fri Sep 22 12:37:21 2017
@@ -48,16 +48,16 @@ define zeroext i8 @test3(i8 zeroext %x,
; X32: # BB#0: # %entry
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: imull $171, %eax, %eax
-; X32-NEXT: andl $65024, %eax # imm = 0xFE00
; X32-NEXT: shrl $9, %eax
+; X32-NEXT: movzwl %ax, %eax
; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-NEXT: retl
;
; X64-LABEL: test3:
; X64: # BB#0: # %entry
; X64-NEXT: imull $171, %esi, %eax
-; X64-NEXT: andl $65024, %eax # imm = 0xFE00
; X64-NEXT: shrl $9, %eax
+; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-NEXT: retq
entry:
@@ -167,8 +167,8 @@ define i8 @test8(i8 %x) nounwind {
; X32-NEXT: shrb %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: imull $211, %eax, %eax
-; X32-NEXT: andl $24576, %eax # imm = 0x6000
; X32-NEXT: shrl $13, %eax
+; X32-NEXT: movzwl %ax, %eax
; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-NEXT: retl
;
@@ -177,8 +177,8 @@ define i8 @test8(i8 %x) nounwind {
; X64-NEXT: shrb %dil
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: imull $211, %eax, %eax
-; X64-NEXT: andl $24576, %eax # imm = 0x6000
; X64-NEXT: shrl $13, %eax
+; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-NEXT: retq
%div = udiv i8 %x, 78
@@ -192,8 +192,8 @@ define i8 @test9(i8 %x) nounwind {
; X32-NEXT: shrb $2, %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: imull $71, %eax, %eax
-; X32-NEXT: andl $6144, %eax # imm = 0x1800
; X32-NEXT: shrl $11, %eax
+; X32-NEXT: movzwl %ax, %eax
; X32-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X32-NEXT: retl
;
@@ -202,8 +202,8 @@ define i8 @test9(i8 %x) nounwind {
; X64-NEXT: shrb $2, %dil
; X64-NEXT: movzbl %dil, %eax
; X64-NEXT: imull $71, %eax, %eax
-; X64-NEXT: andl $6144, %eax # imm = 0x1800
; X64-NEXT: shrl $11, %eax
+; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; X64-NEXT: retq
%div = udiv i8 %x, 116
Modified: llvm/trunk/test/CodeGen/X86/known-bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-bits.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-bits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-bits.ll Fri Sep 22 12:37:21 2017
@@ -12,8 +12,8 @@ define void @knownbits_zext_in_reg(i8*)
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: movzbl (%eax), %eax
; X32-NEXT: imull $101, %eax, %eax
-; X32-NEXT: andl $16384, %eax # imm = 0x4000
; X32-NEXT: shrl $14, %eax
+; X32-NEXT: movzwl %ax, %eax
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: vmovd %eax, %xmm0
; X32-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -50,8 +50,8 @@ define void @knownbits_zext_in_reg(i8*)
; X64: # BB#0: # %BB
; X64-NEXT: movzbl (%rdi), %eax
; X64-NEXT: imull $101, %eax, %eax
-; X64-NEXT: andl $16384, %eax # imm = 0x4000
; X64-NEXT: shrl $14, %eax
+; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: vmovd %eax, %xmm0
; X64-NEXT: vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
Modified: llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll (original)
+++ llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll Fri Sep 22 12:37:21 2017
@@ -12,10 +12,9 @@ define void @foo(i32 %a) {
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: .Lcfi0:
; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: shrl $23, %eax
-; CHECK-NEXT: testb $1, %ah
-; CHECK-NEXT: jne .LBB0_2
+; CHECK-NEXT: shrl $23, %edi
+; CHECK-NEXT: btl $8, %edi
+; CHECK-NEXT: jb .LBB0_2
; CHECK-NEXT: # BB#1: # %true
; CHECK-NEXT: callq qux
; CHECK-NEXT: .LBB0_2: # %false
Modified: llvm/trunk/test/CodeGen/X86/test-shrink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/test-shrink.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/test-shrink.ll (original)
+++ llvm/trunk/test/CodeGen/X86/test-shrink.ll Fri Sep 22 12:37:21 2017
@@ -3,10 +3,10 @@
; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=CHECK-32
; CHECK-64-LABEL: g64xh:
-; CHECK-64: testb $8, {{%ah|%ch}}
+; CHECK-64: btl $11
; CHECK-64: ret
; CHECK-32-LABEL: g64xh:
-; CHECK-32: testb $8, %ah
+; CHECK-32: btl $11
; CHECK-32: ret
define void @g64xh(i64 inreg %x) nounwind {
%t = and i64 %x, 2048
@@ -37,10 +37,10 @@ no:
ret void
}
; CHECK-64-LABEL: g32xh:
-; CHECK-64: testb $8, {{%ah|%ch}}
+; CHECK-64: btl $11
; CHECK-64: ret
; CHECK-32-LABEL: g32xh:
-; CHECK-32: testb $8, %ah
+; CHECK-32: btl $11
; CHECK-32: ret
define void @g32xh(i32 inreg %x) nounwind {
%t = and i32 %x, 2048
@@ -71,10 +71,10 @@ no:
ret void
}
; CHECK-64-LABEL: g16xh:
-; CHECK-64: testb $8, {{%ah|%ch}}
+; CHECK-64: btl $11
; CHECK-64: ret
; CHECK-32-LABEL: g16xh:
-; CHECK-32: testb $8, %ah
+; CHECK-32: btl $11
; CHECK-32: ret
define void @g16xh(i16 inreg %x) nounwind {
%t = and i16 %x, 2048
Modified: llvm/trunk/test/CodeGen/X86/urem-i8-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/urem-i8-constant.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/urem-i8-constant.ll (original)
+++ llvm/trunk/test/CodeGen/X86/urem-i8-constant.ll Fri Sep 22 12:37:21 2017
@@ -1,13 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s
+; computeKnownBits determines that we don't need a mask op that is required in the general case.
+
define i8 @foo(i8 %tmp325) {
; CHECK-LABEL: foo:
; CHECK: # BB#0:
; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: imull $111, %ecx, %eax
-; CHECK-NEXT: andl $28672, %eax # imm = 0x7000
; CHECK-NEXT: shrl $12, %eax
+; CHECK-NEXT: movzwl %ax, %eax
; CHECK-NEXT: movb $37, %dl
; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
; CHECK-NEXT: mulb %dl
Modified: llvm/trunk/test/CodeGen/X86/xor-icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xor-icmp.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xor-icmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xor-icmp.ll Fri Sep 22 12:37:21 2017
@@ -17,15 +17,13 @@ define i32 @t(i32 %a, i32 %b) nounwind s
;
; X64-LABEL: t:
; X64: # BB#0: # %entry
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: xorl %esi, %eax
-; X64-NEXT: testb $64, %ah
-; X64-NEXT: je .LBB0_1
-; X64-NEXT: # BB#2: # %bb1
+; X64-NEXT: xorl %esi, %edi
; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: btl $14, %edi
+; X64-NEXT: jae .LBB0_1
+; X64-NEXT: # BB#2: # %bb1
; X64-NEXT: jmp bar # TAILCALL
; X64-NEXT: .LBB0_1: # %bb
-; X64-NEXT: xorl %eax, %eax
; X64-NEXT: jmp foo # TAILCALL
entry:
%0 = and i32 %a, 16384
More information about the llvm-commits
mailing list