[llvm] r314023 - [x86] swap order of srl (and X, C1), C2 when it saves size

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Fri Sep 22 12:37:22 PDT 2017


Author: spatel
Date: Fri Sep 22 12:37:21 2017
New Revision: 314023

URL: http://llvm.org/viewvc/llvm-project?rev=314023&view=rev
Log:
[x86] swap order of srl (and X, C1), C2 when it saves size

The (non-)obvious win comes from saving 3 bytes by using the 0x83 'and' opcode variant instead of 0x81. 
There are also better improvements based on known-bits that allow us to eliminate the mask entirely.

As noted, this could be extended. There are potentially other wins from always shifting first, but doing
that reveals a tangle of problems in other pattern matching. We do this transform generically in 
instcombine, but we often have icmp IR that doesn't match that pattern, so we must account for this
in the backend.

Differential Revision: https://reviews.llvm.org/D38181

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
    llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
    llvm/trunk/test/CodeGen/X86/divide-by-constant.ll
    llvm/trunk/test/CodeGen/X86/known-bits.ll
    llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll
    llvm/trunk/test/CodeGen/X86/test-shrink.ll
    llvm/trunk/test/CodeGen/X86/urem-i8-constant.ll
    llvm/trunk/test/CodeGen/X86/xor-icmp.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Sep 22 12:37:21 2017
@@ -31762,6 +31762,40 @@ static SDValue combineShiftRightAlgebrai
   return SDValue();
 }
 
+static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG) {
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT VT = N0.getValueType();
+
+  // Try to improve a sequence of srl (and X, C1), C2 by inverting the order.
+  // TODO: This is a generic DAG combine that became an x86-only combine to
+  // avoid shortcomings in other folds such as bswap, bit-test ('bt'), and
+  // and-not ('andn').
+  if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+    return SDValue();
+
+  auto *ShiftC = dyn_cast<ConstantSDNode>(N1);
+  auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+  if (!ShiftC || !AndC)
+    return SDValue();
+
+  // If the 'and' mask is already smaller than a byte, then don't bother.
+  // If the new 'and' mask would be bigger than a byte, then don't bother.
+  // If the mask fits in a byte, then we know we can generate smaller and
+  // potentially better code by shifting first.
+  // TODO: Always try to shrink a mask that is over 32-bits?
+  APInt MaskVal = AndC->getAPIntValue();
+  APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue());
+  if (MaskVal.getMinSignedBits() <= 8 || NewMaskVal.getMinSignedBits() > 8)
+    return SDValue();
+
+  // srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC)
+  SDLoc DL(N);
+  SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT);
+  SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1);
+  return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask);
+}
+
 /// \brief Returns a vector of 0s if the node in input is a vector logical
 /// shift by a constant amount which is known to be bigger than or equal
 /// to the vector element size in bits.
@@ -31804,6 +31838,10 @@ static SDValue combineShift(SDNode* N, S
     if (SDValue V = combineShiftRightAlgebraic(N, DAG))
       return V;
 
+  if (N->getOpcode() == ISD::SRL)
+    if (SDValue V = combineShiftRightLogical(N, DAG))
+      return V;
+
   // Try to fold this logical shift into a zero vector.
   if (N->getOpcode() != ISD::SRA)
     if (SDValue V = performShiftToAllZeros(N, DAG, Subtarget))

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-fast-isel.ll Fri Sep 22 12:37:21 2017
@@ -129,8 +129,8 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
 ; X32-NEXT:    vpmovb2m %zmm1, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $61440, %ecx # imm = 0xF000
 ; X32-NEXT:    shrl $12, %ecx
+; X32-NEXT:    andl $15, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm1
 ; X32-NEXT:    vpbroadcastd %xmm1, %xmm1
@@ -151,8 +151,8 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
 ; X32-NEXT:    vpmovb2m %zmm1, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $49152, %ecx # imm = 0xC000
 ; X32-NEXT:    shrl $14, %ecx
+; X32-NEXT:    andl $3, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm1
 ; X32-NEXT:    vpbroadcastw %xmm1, %xmm1
@@ -162,8 +162,8 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    vshufi64x2 {{.*#+}} zmm1 = zmm1[0,1,2,3],zmm2[4,5,6,7]
 ; X32-NEXT:    vpmovb2m %zmm1, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $32768, %ecx # imm = 0x8000
 ; X32-NEXT:    shrl $15, %ecx
+; X32-NEXT:    andl $1, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm1
 ; X32-NEXT:    vpslldq {{.*#+}} xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm1[0]
@@ -483,8 +483,8 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; X32-NEXT:    vpmovb2m %zmm0, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $61440, %ecx # imm = 0xF000
 ; X32-NEXT:    shrl $12, %ecx
+; X32-NEXT:    andl $15, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm0
 ; X32-NEXT:    vpbroadcastd %xmm0, %xmm0
@@ -507,8 +507,8 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; X32-NEXT:    vpmovb2m %zmm0, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $49152, %ecx # imm = 0xC000
 ; X32-NEXT:    shrl $14, %ecx
+; X32-NEXT:    andl $3, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm0
 ; X32-NEXT:    vpbroadcastw %xmm0, %xmm0
@@ -519,8 +519,8 @@ define <8 x i64> @test_mm512_mask_set1_e
 ; X32-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; X32-NEXT:    vpmovb2m %zmm0, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $32768, %ecx # imm = 0x8000
 ; X32-NEXT:    shrl $15, %ecx
+; X32-NEXT:    andl $1, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm0
 ; X32-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
@@ -860,8 +860,8 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
 ; X32-NEXT:    vpmovb2m %zmm0, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $61440, %ecx # imm = 0xF000
 ; X32-NEXT:    shrl $12, %ecx
+; X32-NEXT:    andl $15, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm0
 ; X32-NEXT:    vpbroadcastd %xmm0, %xmm0
@@ -882,8 +882,8 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
 ; X32-NEXT:    vpmovb2m %zmm0, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $49152, %ecx # imm = 0xC000
 ; X32-NEXT:    shrl $14, %ecx
+; X32-NEXT:    andl $3, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm0
 ; X32-NEXT:    vpbroadcastw %xmm0, %xmm0
@@ -893,8 +893,8 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[4,5,6,7]
 ; X32-NEXT:    vpmovb2m %zmm0, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $32768, %ecx # imm = 0x8000
 ; X32-NEXT:    shrl $15, %ecx
+; X32-NEXT:    andl $1, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm0
 ; X32-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]
@@ -1214,8 +1214,8 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; X32-NEXT:    vpmovb2m %zmm0, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $61440, %ecx # imm = 0xF000
 ; X32-NEXT:    shrl $12, %ecx
+; X32-NEXT:    andl $15, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm0
 ; X32-NEXT:    vpbroadcastd %xmm0, %xmm0
@@ -1238,8 +1238,8 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; X32-NEXT:    vpmovb2m %zmm0, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $49152, %ecx # imm = 0xC000
 ; X32-NEXT:    shrl $14, %ecx
+; X32-NEXT:    andl $3, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm0
 ; X32-NEXT:    vpbroadcastw %xmm0, %xmm0
@@ -1250,8 +1250,8 @@ define <8 x i64> @test_mm512_maskz_set1_
 ; X32-NEXT:    vinserti64x4 $1, %ymm0, %zmm1, %zmm0
 ; X32-NEXT:    vpmovb2m %zmm0, %k0
 ; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    andl $32768, %ecx # imm = 0x8000
 ; X32-NEXT:    shrl $15, %ecx
+; X32-NEXT:    andl $1, %ecx
 ; X32-NEXT:    kmovd %ecx, %k1
 ; X32-NEXT:    vpmovm2b %k1, %zmm0
 ; X32-NEXT:    vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm0[0]

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll Fri Sep 22 12:37:21 2017
@@ -1734,19 +1734,29 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ;
 ; AVX512F-32-LABEL: test_mask_cmp_b_512:
 ; AVX512F-32:       # BB#0:
-; AVX512F-32-NEXT:    pushl %ebx
+; AVX512F-32-NEXT:    pushl %ebp
 ; AVX512F-32-NEXT:  .Lcfi5:
 ; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
-; AVX512F-32-NEXT:    pushl %esi
+; AVX512F-32-NEXT:    pushl %ebx
 ; AVX512F-32-NEXT:  .Lcfi6:
 ; AVX512F-32-NEXT:    .cfi_def_cfa_offset 12
-; AVX512F-32-NEXT:    subl $60, %esp
+; AVX512F-32-NEXT:    pushl %edi
 ; AVX512F-32-NEXT:  .Lcfi7:
-; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    pushl %esi
 ; AVX512F-32-NEXT:  .Lcfi8:
-; AVX512F-32-NEXT:    .cfi_offset %esi, -12
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 20
+; AVX512F-32-NEXT:    subl $60, %esp
 ; AVX512F-32-NEXT:  .Lcfi9:
-; AVX512F-32-NEXT:    .cfi_offset %ebx, -8
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 80
+; AVX512F-32-NEXT:  .Lcfi10:
+; AVX512F-32-NEXT:    .cfi_offset %esi, -20
+; AVX512F-32-NEXT:  .Lcfi11:
+; AVX512F-32-NEXT:    .cfi_offset %edi, -16
+; AVX512F-32-NEXT:  .Lcfi12:
+; AVX512F-32-NEXT:    .cfi_offset %ebx, -12
+; AVX512F-32-NEXT:  .Lcfi13:
+; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; AVX512F-32-NEXT:    movl %ecx, %eax
 ; AVX512F-32-NEXT:    shrb $5, %al
@@ -1862,9 +1872,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT:    shrl $12, %eax
+; AVX512F-32-NEXT:    movl $1036, %edi # imm = 0x40C
+; AVX512F-32-NEXT:    bextrl %edi, %ecx, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastd %xmm2, %xmm2
@@ -1874,6 +1883,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
 ; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ecx, %ebp
 ; AVX512F-32-NEXT:    shrl $13, %eax
 ; AVX512F-32-NEXT:    andb $1, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -1884,9 +1894,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT:    shrl $14, %eax
+; AVX512F-32-NEXT:    movl $526, %edx # imm = 0x20E
+; AVX512F-32-NEXT:    bextrl %edx, %ebp, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
@@ -1894,10 +1903,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
+; AVX512F-32-NEXT:    movl $271, %esi # imm = 0x10F
+; AVX512F-32-NEXT:    bextrl %esi, %ebp, %eax
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT:    shrl $15, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
@@ -1906,9 +1914,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    shrl $16, %eax
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    shrl $16, %ecx
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm3
@@ -1916,10 +1923,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $2, %dl
-; AVX512F-32-NEXT:    shrb %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    andb $2, %al
+; AVX512F-32-NEXT:    shrb %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllw $8, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1928,9 +1935,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %ebx
+; AVX512F-32-NEXT:    movl %ecx, %ebx
 ; AVX512F-32-NEXT:    andb $15, %bl
-; AVX512F-32-NEXT:    movl %ebx, %edx
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrb $2, %bl
 ; AVX512F-32-NEXT:    kmovd %ebx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -1941,8 +1948,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    shrb $3, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    shrb $3, %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpslld $24, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1951,9 +1958,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    shrb $4, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $4, %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastd %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1962,10 +1969,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    shrb $5, %dl
-; AVX512F-32-NEXT:    andb $1, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $5, %al
+; AVX512F-32-NEXT:    andb $1, %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllq $40, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1974,9 +1981,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    shrb $6, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $6, %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1985,9 +1992,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
-; AVX512F-32-NEXT:    shrb $7, %al
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    # kill: %CL<def> %CL<kill> %ECX<kill> %ECX<def>
+; AVX512F-32-NEXT:    shrb $7, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllq $56, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -1996,9 +2003,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    shrl $24, %eax
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    movl %ebp, %ebx
+; AVX512F-32-NEXT:    shrl $24, %ebx
+; AVX512F-32-NEXT:    kmovd %ebx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastq %xmm2, %ymm3
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm4
@@ -2006,10 +2013,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm4[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm3, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $2, %dl
-; AVX512F-32-NEXT:    shrb %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ebx, %eax
+; AVX512F-32-NEXT:    andb $2, %al
+; AVX512F-32-NEXT:    shrb %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm3
 ; AVX512F-32-NEXT:    vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6]
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm4
@@ -2018,11 +2025,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm4 = zmm4[0,1,2,3],zmm5[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm4, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    movl %edx, %eax
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
+; AVX512F-32-NEXT:    andb $15, %bl
+; AVX512F-32-NEXT:    movl %ebx, %eax
+; AVX512F-32-NEXT:    shrb $2, %bl
+; AVX512F-32-NEXT:    kmovd %ebx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm4
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm4, %xmm4
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm4, %ymm0, %ymm5
@@ -2041,6 +2048,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm6, %ymm7, %ymm5, %ymm5
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm5, %k0
+; AVX512F-32-NEXT:    movl %ebp, %ecx
 ; AVX512F-32-NEXT:    movl %ecx, %eax
 ; AVX512F-32-NEXT:    shrl $28, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -2053,7 +2061,6 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm5, %k0
 ; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    movl %ecx, %esi
 ; AVX512F-32-NEXT:    shrl $29, %eax
 ; AVX512F-32-NEXT:    andb $1, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -2065,7 +2072,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm3, %ymm2, %ymm7, %ymm7
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %esi, %eax
+; AVX512F-32-NEXT:    movl %ebp, %eax
 ; AVX512F-32-NEXT:    shrl $30, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -2076,7 +2083,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %esi, %eax
+; AVX512F-32-NEXT:    movl %ebp, %eax
 ; AVX512F-32-NEXT:    shrl $31, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -2087,8 +2094,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT:    kmovd %ecx, %k1
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT:    kmovd %ebx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm2
 ; AVX512F-32-NEXT:    vextracti64x4 $1, %zmm2, %ymm3
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm4
@@ -2096,7 +2103,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    andb $2, %al
 ; AVX512F-32-NEXT:    shrb %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -2108,11 +2115,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %edx
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    movl %edx, %eax
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ebx, %ecx
+; AVX512F-32-NEXT:    andb $15, %cl
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $2, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm3
@@ -2131,7 +2138,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrb $4, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -2142,7 +2149,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrb $5, %al
 ; AVX512F-32-NEXT:    andb $1, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -2154,7 +2161,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrb $6, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -2165,7 +2172,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrb $7, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -2176,7 +2183,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movb %ch, %al
+; AVX512F-32-NEXT:    movb %bh, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastq %xmm2, %xmm2
@@ -2197,11 +2204,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movb %ch, %dl
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    movl %edx, %eax
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movb %bh, %cl
+; AVX512F-32-NEXT:    andb $15, %cl
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $2, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm3
@@ -2220,9 +2227,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT:    shrl $12, %eax
+; AVX512F-32-NEXT:    bextrl %edi, %ebx, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastd %xmm2, %xmm2
@@ -2232,7 +2237,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $13, %eax
 ; AVX512F-32-NEXT:    andb $1, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -2244,9 +2249,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT:    shrl $14, %eax
+; AVX512F-32-NEXT:    bextrl %edx, %ebx, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
@@ -2255,10 +2258,8 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
+; AVX512F-32-NEXT:    bextrl %esi, %ebx, %eax
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT:    shrl $15, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
@@ -2268,9 +2269,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %ebx
-; AVX512F-32-NEXT:    shrl $16, %ebx
-; AVX512F-32-NEXT:    kmovd %ebx, %k1
+; AVX512F-32-NEXT:    movl %ebx, %eax
+; AVX512F-32-NEXT:    shrl $16, %eax
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm3
@@ -2279,10 +2280,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ebx, %edx
-; AVX512F-32-NEXT:    andb $2, %dl
-; AVX512F-32-NEXT:    shrb %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    andb $2, %cl
+; AVX512F-32-NEXT:    shrb %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllw $8, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2291,11 +2292,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    andb $15, %al
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    shrb $2, %al
-; AVX512F-32-NEXT:    kmovd %eax, %k0
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    andb $15, %cl
+; AVX512F-32-NEXT:    movl %ecx, %edx
+; AVX512F-32-NEXT:    shrb $2, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k0
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
@@ -2317,9 +2318,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrb $4, %al
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    shrb $4, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastd %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2329,10 +2330,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrb $5, %al
-; AVX512F-32-NEXT:    andb $1, %al
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    shrb $5, %cl
+; AVX512F-32-NEXT:    andb $1, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllq $40, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2342,9 +2343,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrb $6, %al
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    shrb $6, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2354,9 +2355,9 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
-; AVX512F-32-NEXT:    shrb $7, %bl
-; AVX512F-32-NEXT:    kmovd %ebx, %k1
+; AVX512F-32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
+; AVX512F-32-NEXT:    shrb $7, %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllq $56, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2366,7 +2367,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $24, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -2377,10 +2378,10 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $2, %dl
-; AVX512F-32-NEXT:    shrb %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    andb $2, %cl
+; AVX512F-32-NEXT:    shrb %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6]
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2389,11 +2390,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    movl %edx, %eax
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k0
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    andb $15, %cl
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $2, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k0
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
@@ -2412,11 +2413,11 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm3
 ; AVX512F-32-NEXT:    vextracti64x4 $1, %zmm2, %ymm4
 ; AVX512F-32-NEXT:    vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $29, %eax
 ; AVX512F-32-NEXT:    andb $1, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $28, %eax
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k1
@@ -2437,7 +2438,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm4, %ymm0, %ymm4
 ; AVX512F-32-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $30, %eax
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
@@ -2450,7 +2451,7 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    vpblendvb %ymm7, %ymm3, %ymm4, %ymm3
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $31, %eax
 ; AVX512F-32-NEXT:    kshiftlq $1, %k0, %k0
 ; AVX512F-32-NEXT:    kshiftrq $1, %k0, %k0
@@ -2485,11 +2486,13 @@ define i64 @test_mask_cmp_b_512(<64 x i8
 ; AVX512F-32-NEXT:    kmovq %k1, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT:    addl %esi, %eax
-; AVX512F-32-NEXT:    adcxl %ecx, %edx
+; AVX512F-32-NEXT:    addl %ebp, %eax
+; AVX512F-32-NEXT:    adcxl %ebx, %edx
 ; AVX512F-32-NEXT:    addl $60, %esp
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    popl %edi
 ; AVX512F-32-NEXT:    popl %ebx
+; AVX512F-32-NEXT:    popl %ebp
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
   %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
@@ -2539,7 +2542,7 @@ define i64 @test_ucmp_b_512(<64 x i8> %a
 ; AVX512F-32-LABEL: test_ucmp_b_512:
 ; AVX512F-32:       # BB#0:
 ; AVX512F-32-NEXT:    subl $60, %esp
-; AVX512F-32-NEXT:  .Lcfi10:
+; AVX512F-32-NEXT:  .Lcfi14:
 ; AVX512F-32-NEXT:    .cfi_def_cfa_offset 64
 ; AVX512F-32-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0
 ; AVX512F-32-NEXT:    kmovq %k0, {{[0-9]+}}(%esp)
@@ -2618,19 +2621,29 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ;
 ; AVX512F-32-LABEL: test_mask_x86_avx512_ucmp_b_512:
 ; AVX512F-32:       # BB#0:
-; AVX512F-32-NEXT:    pushl %ebx
-; AVX512F-32-NEXT:  .Lcfi11:
+; AVX512F-32-NEXT:    pushl %ebp
+; AVX512F-32-NEXT:  .Lcfi15:
 ; AVX512F-32-NEXT:    .cfi_def_cfa_offset 8
-; AVX512F-32-NEXT:    pushl %esi
-; AVX512F-32-NEXT:  .Lcfi12:
+; AVX512F-32-NEXT:    pushl %ebx
+; AVX512F-32-NEXT:  .Lcfi16:
 ; AVX512F-32-NEXT:    .cfi_def_cfa_offset 12
+; AVX512F-32-NEXT:    pushl %edi
+; AVX512F-32-NEXT:  .Lcfi17:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 16
+; AVX512F-32-NEXT:    pushl %esi
+; AVX512F-32-NEXT:  .Lcfi18:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 20
 ; AVX512F-32-NEXT:    subl $60, %esp
-; AVX512F-32-NEXT:  .Lcfi13:
-; AVX512F-32-NEXT:    .cfi_def_cfa_offset 72
-; AVX512F-32-NEXT:  .Lcfi14:
-; AVX512F-32-NEXT:    .cfi_offset %esi, -12
-; AVX512F-32-NEXT:  .Lcfi15:
-; AVX512F-32-NEXT:    .cfi_offset %ebx, -8
+; AVX512F-32-NEXT:  .Lcfi19:
+; AVX512F-32-NEXT:    .cfi_def_cfa_offset 80
+; AVX512F-32-NEXT:  .Lcfi20:
+; AVX512F-32-NEXT:    .cfi_offset %esi, -20
+; AVX512F-32-NEXT:  .Lcfi21:
+; AVX512F-32-NEXT:    .cfi_offset %edi, -16
+; AVX512F-32-NEXT:  .Lcfi22:
+; AVX512F-32-NEXT:    .cfi_offset %ebx, -12
+; AVX512F-32-NEXT:  .Lcfi23:
+; AVX512F-32-NEXT:    .cfi_offset %ebp, -8
 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; AVX512F-32-NEXT:    movl %ecx, %eax
 ; AVX512F-32-NEXT:    shrb $5, %al
@@ -2746,9 +2759,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT:    shrl $12, %eax
+; AVX512F-32-NEXT:    movl $1036, %edi # imm = 0x40C
+; AVX512F-32-NEXT:    bextrl %edi, %ecx, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastd %xmm2, %xmm2
@@ -2758,6 +2770,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
 ; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ecx, %ebp
 ; AVX512F-32-NEXT:    shrl $13, %eax
 ; AVX512F-32-NEXT:    andb $1, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -2768,9 +2781,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT:    shrl $14, %eax
+; AVX512F-32-NEXT:    movl $526, %edx # imm = 0x20E
+; AVX512F-32-NEXT:    bextrl %edx, %ebp, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
@@ -2778,10 +2790,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
+; AVX512F-32-NEXT:    movl $271, %esi # imm = 0x10F
+; AVX512F-32-NEXT:    bextrl %esi, %ebp, %eax
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT:    shrl $15, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
@@ -2790,9 +2801,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    shrl $16, %eax
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    shrl $16, %ecx
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm3
@@ -2800,10 +2810,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $2, %dl
-; AVX512F-32-NEXT:    shrb %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    andb $2, %al
+; AVX512F-32-NEXT:    shrb %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllw $8, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2812,9 +2822,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %ebx
+; AVX512F-32-NEXT:    movl %ecx, %ebx
 ; AVX512F-32-NEXT:    andb $15, %bl
-; AVX512F-32-NEXT:    movl %ebx, %edx
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrb $2, %bl
 ; AVX512F-32-NEXT:    kmovd %ebx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -2825,8 +2835,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    shrb $3, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    shrb $3, %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpslld $24, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2835,9 +2845,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    shrb $4, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $4, %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastd %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2846,10 +2856,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    shrb $5, %dl
-; AVX512F-32-NEXT:    andb $1, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $5, %al
+; AVX512F-32-NEXT:    andb $1, %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllq $40, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2858,9 +2868,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    shrb $6, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $6, %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2869,9 +2879,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
-; AVX512F-32-NEXT:    shrb $7, %al
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    # kill: %CL<def> %CL<kill> %ECX<kill> %ECX<def>
+; AVX512F-32-NEXT:    shrb $7, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllq $56, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -2880,9 +2890,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    shrl $24, %eax
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    movl %ebp, %ebx
+; AVX512F-32-NEXT:    shrl $24, %ebx
+; AVX512F-32-NEXT:    kmovd %ebx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastq %xmm2, %ymm3
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm4
@@ -2890,10 +2900,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm2, %ymm4, %ymm3, %ymm3
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm3 = zmm3[0,1,2,3],zmm4[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm3, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $2, %dl
-; AVX512F-32-NEXT:    shrb %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ebx, %eax
+; AVX512F-32-NEXT:    andb $2, %al
+; AVX512F-32-NEXT:    shrb %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm3
 ; AVX512F-32-NEXT:    vpslldq {{.*#+}} xmm3 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6]
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm4
@@ -2902,11 +2912,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm2, %ymm5, %ymm4, %ymm4
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm4 = zmm4[0,1,2,3],zmm5[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm4, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    movl %edx, %eax
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
+; AVX512F-32-NEXT:    andb $15, %bl
+; AVX512F-32-NEXT:    movl %ebx, %eax
+; AVX512F-32-NEXT:    shrb $2, %bl
+; AVX512F-32-NEXT:    kmovd %ebx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm4
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm4, %xmm4
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm4, %ymm0, %ymm5
@@ -2925,6 +2935,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm6, %ymm7, %ymm5, %ymm5
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm5, %k0
+; AVX512F-32-NEXT:    movl %ebp, %ecx
 ; AVX512F-32-NEXT:    movl %ecx, %eax
 ; AVX512F-32-NEXT:    shrl $28, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -2937,7 +2948,6 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm5 = zmm5[0,1,2,3],zmm7[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm5, %k0
 ; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    movl %ecx, %esi
 ; AVX512F-32-NEXT:    shrl $29, %eax
 ; AVX512F-32-NEXT:    andb $1, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -2949,7 +2959,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm3, %ymm2, %ymm7, %ymm7
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm7[0,1,2,3],zmm2[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %esi, %eax
+; AVX512F-32-NEXT:    movl %ebp, %eax
 ; AVX512F-32-NEXT:    shrl $30, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -2960,7 +2970,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm7, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %esi, %eax
+; AVX512F-32-NEXT:    movl %ebp, %eax
 ; AVX512F-32-NEXT:    shrl $31, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -2971,8 +2981,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm4, %ymm3, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm2 = zmm2[0,1,2,3],zmm3[4,5,6,7]
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; AVX512F-32-NEXT:    kmovd %ecx, %k1
+; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; AVX512F-32-NEXT:    kmovd %ebx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm2
 ; AVX512F-32-NEXT:    vextracti64x4 $1, %zmm2, %ymm3
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm4
@@ -2980,7 +2990,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    andb $2, %al
 ; AVX512F-32-NEXT:    shrb %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -2992,11 +3002,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %edx
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    movl %edx, %eax
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %ebx, %ecx
+; AVX512F-32-NEXT:    andb $15, %cl
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $2, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm3
@@ -3015,7 +3025,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrb $4, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -3026,7 +3036,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrb $5, %al
 ; AVX512F-32-NEXT:    andb $1, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -3038,7 +3048,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrb $6, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -3049,7 +3059,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrb $7, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -3060,7 +3070,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movb %ch, %al
+; AVX512F-32-NEXT:    movb %bh, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastq %xmm2, %xmm2
@@ -3081,11 +3091,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movb %ch, %dl
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    movl %edx, %eax
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movb %bh, %cl
+; AVX512F-32-NEXT:    andb $15, %cl
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $2, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm3
@@ -3104,9 +3114,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $61440, %eax # imm = 0xF000
-; AVX512F-32-NEXT:    shrl $12, %eax
+; AVX512F-32-NEXT:    bextrl %edi, %ebx, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastd %xmm2, %xmm2
@@ -3116,7 +3124,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $13, %eax
 ; AVX512F-32-NEXT:    andb $1, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
@@ -3128,9 +3136,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $49152, %eax # imm = 0xC000
-; AVX512F-32-NEXT:    shrl $14, %eax
+; AVX512F-32-NEXT:    bextrl %edx, %ebx, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
@@ -3139,10 +3145,8 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
+; AVX512F-32-NEXT:    bextrl %esi, %ebx, %eax
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
-; AVX512F-32-NEXT:    andl $32768, %eax # imm = 0x8000
-; AVX512F-32-NEXT:    shrl $15, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0]
@@ -3152,9 +3156,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %ebx
-; AVX512F-32-NEXT:    shrl $16, %ebx
-; AVX512F-32-NEXT:    kmovd %ebx, %k1
+; AVX512F-32-NEXT:    movl %ebx, %eax
+; AVX512F-32-NEXT:    shrl $16, %eax
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm3
@@ -3163,10 +3167,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ebx, %edx
-; AVX512F-32-NEXT:    andb $2, %dl
-; AVX512F-32-NEXT:    shrb %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    andb $2, %cl
+; AVX512F-32-NEXT:    shrb %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllw $8, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3175,11 +3179,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255]
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    andb $15, %al
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    shrb $2, %al
-; AVX512F-32-NEXT:    kmovd %eax, %k0
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    andb $15, %cl
+; AVX512F-32-NEXT:    movl %ecx, %edx
+; AVX512F-32-NEXT:    shrb $2, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k0
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
@@ -3201,9 +3205,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrb $4, %al
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    shrb $4, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastd %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3213,10 +3217,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrb $5, %al
-; AVX512F-32-NEXT:    andb $1, %al
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    shrb $5, %cl
+; AVX512F-32-NEXT:    andb $1, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllq $40, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3226,9 +3230,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ebx, %eax
-; AVX512F-32-NEXT:    shrb $6, %al
-; AVX512F-32-NEXT:    kmovd %eax, %k1
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    shrb $6, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3238,9 +3242,9 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    # kill: %BL<def> %BL<kill> %EBX<kill> %EBX<def>
-; AVX512F-32-NEXT:    shrb $7, %bl
-; AVX512F-32-NEXT:    kmovd %ebx, %k1
+; AVX512F-32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill> %EAX<def>
+; AVX512F-32-NEXT:    shrb $7, %al
+; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpsllq $56, %xmm2, %xmm2
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3250,7 +3254,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $24, %eax
 ; AVX512F-32-NEXT:    kmovd %eax, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
@@ -3261,10 +3265,10 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $2, %dl
-; AVX512F-32-NEXT:    shrb %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k1
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    andb $2, %cl
+; AVX512F-32-NEXT:    shrb %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k1, %zmm2
 ; AVX512F-32-NEXT:    vpslldq {{.*#+}} xmm2 = zero,zero,zero,zero,zero,zero,zero,zero,zero,xmm2[0,1,2,3,4,5,6]
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm2, %ymm0, %ymm2
@@ -3273,11 +3277,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255,255,255,255,255]
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm4, %ymm2, %ymm2
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm2
-; AVX512F-32-NEXT:    movl %eax, %edx
-; AVX512F-32-NEXT:    andb $15, %dl
-; AVX512F-32-NEXT:    movl %edx, %eax
-; AVX512F-32-NEXT:    shrb $2, %dl
-; AVX512F-32-NEXT:    kmovd %edx, %k0
+; AVX512F-32-NEXT:    movl %eax, %ecx
+; AVX512F-32-NEXT:    andb $15, %cl
+; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    shrb $2, %cl
+; AVX512F-32-NEXT:    kmovd %ecx, %k0
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k1
 ; AVX512F-32-NEXT:    vpmovm2b %k0, %zmm2
 ; AVX512F-32-NEXT:    vpbroadcastw %xmm2, %xmm2
@@ -3296,11 +3300,11 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm3, %ymm0, %ymm3
 ; AVX512F-32-NEXT:    vextracti64x4 $1, %zmm2, %ymm4
 ; AVX512F-32-NEXT:    vpblendvb %ymm6, %ymm4, %ymm3, %ymm3
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $29, %eax
 ; AVX512F-32-NEXT:    andb $1, %al
 ; AVX512F-32-NEXT:    kmovd %eax, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $28, %eax
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k1
@@ -3321,7 +3325,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vinserti128 $1, %xmm4, %ymm0, %ymm4
 ; AVX512F-32-NEXT:    vmovdqa {{.*#+}} ymm5 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,255,255]
 ; AVX512F-32-NEXT:    vpblendvb %ymm5, %ymm3, %ymm4, %ymm3
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $30, %eax
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
@@ -3334,7 +3338,7 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    vpblendvb %ymm7, %ymm3, %ymm4, %ymm3
 ; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm3, %zmm2, %zmm2
 ; AVX512F-32-NEXT:    vpmovb2m %zmm2, %k0
-; AVX512F-32-NEXT:    movl %ecx, %eax
+; AVX512F-32-NEXT:    movl %ebx, %eax
 ; AVX512F-32-NEXT:    shrl $31, %eax
 ; AVX512F-32-NEXT:    kshiftlq $1, %k0, %k0
 ; AVX512F-32-NEXT:    kshiftrq $1, %k0, %k0
@@ -3369,11 +3373,13 @@ define i64 @test_mask_x86_avx512_ucmp_b_
 ; AVX512F-32-NEXT:    kmovq %k1, {{[0-9]+}}(%esp)
 ; AVX512F-32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 ; AVX512F-32-NEXT:    adcxl {{[0-9]+}}(%esp), %edx
-; AVX512F-32-NEXT:    addl %esi, %eax
-; AVX512F-32-NEXT:    adcxl %ecx, %edx
+; AVX512F-32-NEXT:    addl %ebp, %eax
+; AVX512F-32-NEXT:    adcxl %ebx, %edx
 ; AVX512F-32-NEXT:    addl $60, %esp
 ; AVX512F-32-NEXT:    popl %esi
+; AVX512F-32-NEXT:    popl %edi
 ; AVX512F-32-NEXT:    popl %ebx
+; AVX512F-32-NEXT:    popl %ebp
 ; AVX512F-32-NEXT:    retl
   %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
   %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)

Modified: llvm/trunk/test/CodeGen/X86/divide-by-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/divide-by-constant.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/divide-by-constant.ll (original)
+++ llvm/trunk/test/CodeGen/X86/divide-by-constant.ll Fri Sep 22 12:37:21 2017
@@ -48,16 +48,16 @@ define zeroext i8 @test3(i8 zeroext %x,
 ; X32:       # BB#0: # %entry
 ; X32-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    imull $171, %eax, %eax
-; X32-NEXT:    andl $65024, %eax # imm = 0xFE00
 ; X32-NEXT:    shrl $9, %eax
+; X32-NEXT:    movzwl %ax, %eax
 ; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: test3:
 ; X64:       # BB#0: # %entry
 ; X64-NEXT:    imull $171, %esi, %eax
-; X64-NEXT:    andl $65024, %eax # imm = 0xFE00
 ; X64-NEXT:    shrl $9, %eax
+; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-NEXT:    retq
 entry:
@@ -167,8 +167,8 @@ define i8 @test8(i8 %x) nounwind {
 ; X32-NEXT:    shrb %al
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    imull $211, %eax, %eax
-; X32-NEXT:    andl $24576, %eax # imm = 0x6000
 ; X32-NEXT:    shrl $13, %eax
+; X32-NEXT:    movzwl %ax, %eax
 ; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-NEXT:    retl
 ;
@@ -177,8 +177,8 @@ define i8 @test8(i8 %x) nounwind {
 ; X64-NEXT:    shrb %dil
 ; X64-NEXT:    movzbl %dil, %eax
 ; X64-NEXT:    imull $211, %eax, %eax
-; X64-NEXT:    andl $24576, %eax # imm = 0x6000
 ; X64-NEXT:    shrl $13, %eax
+; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %div = udiv i8 %x, 78
@@ -192,8 +192,8 @@ define i8 @test9(i8 %x) nounwind {
 ; X32-NEXT:    shrb $2, %al
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    imull $71, %eax, %eax
-; X32-NEXT:    andl $6144, %eax # imm = 0x1800
 ; X32-NEXT:    shrl $11, %eax
+; X32-NEXT:    movzwl %ax, %eax
 ; X32-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X32-NEXT:    retl
 ;
@@ -202,8 +202,8 @@ define i8 @test9(i8 %x) nounwind {
 ; X64-NEXT:    shrb $2, %dil
 ; X64-NEXT:    movzbl %dil, %eax
 ; X64-NEXT:    imull $71, %eax, %eax
-; X64-NEXT:    andl $6144, %eax # imm = 0x1800
 ; X64-NEXT:    shrl $11, %eax
+; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; X64-NEXT:    retq
   %div = udiv i8 %x, 116

Modified: llvm/trunk/test/CodeGen/X86/known-bits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/known-bits.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/known-bits.ll (original)
+++ llvm/trunk/test/CodeGen/X86/known-bits.ll Fri Sep 22 12:37:21 2017
@@ -12,8 +12,8 @@ define void @knownbits_zext_in_reg(i8*)
 ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X32-NEXT:    movzbl (%eax), %eax
 ; X32-NEXT:    imull $101, %eax, %eax
-; X32-NEXT:    andl $16384, %eax # imm = 0x4000
 ; X32-NEXT:    shrl $14, %eax
+; X32-NEXT:    movzwl %ax, %eax
 ; X32-NEXT:    movzbl %al, %eax
 ; X32-NEXT:    vmovd %eax, %xmm0
 ; X32-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
@@ -50,8 +50,8 @@ define void @knownbits_zext_in_reg(i8*)
 ; X64:       # BB#0: # %BB
 ; X64-NEXT:    movzbl (%rdi), %eax
 ; X64-NEXT:    imull $101, %eax, %eax
-; X64-NEXT:    andl $16384, %eax # imm = 0x4000
 ; X64-NEXT:    shrl $14, %eax
+; X64-NEXT:    movzwl %ax, %eax
 ; X64-NEXT:    movzbl %al, %eax
 ; X64-NEXT:    vmovd %eax, %xmm0
 ; X64-NEXT:    vpshufb {{.*#+}} xmm0 = zero,zero,zero,zero,xmm0[0],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero

Modified: llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll (original)
+++ llvm/trunk/test/CodeGen/X86/live-out-reg-info.ll Fri Sep 22 12:37:21 2017
@@ -12,10 +12,9 @@ define void @foo(i32 %a) {
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:  .Lcfi0:
 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
-; CHECK-NEXT:    movl %edi, %eax
-; CHECK-NEXT:    shrl $23, %eax
-; CHECK-NEXT:    testb $1, %ah
-; CHECK-NEXT:    jne .LBB0_2
+; CHECK-NEXT:    shrl $23, %edi
+; CHECK-NEXT:    btl $8, %edi
+; CHECK-NEXT:    jb .LBB0_2
 ; CHECK-NEXT:  # BB#1: # %true
 ; CHECK-NEXT:    callq qux
 ; CHECK-NEXT:  .LBB0_2: # %false

Modified: llvm/trunk/test/CodeGen/X86/test-shrink.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/test-shrink.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/test-shrink.ll (original)
+++ llvm/trunk/test/CodeGen/X86/test-shrink.ll Fri Sep 22 12:37:21 2017
@@ -3,10 +3,10 @@
 ; RUN: llc < %s -mtriple=i686-- | FileCheck %s --check-prefix=CHECK-32
 
 ; CHECK-64-LABEL: g64xh:
-; CHECK-64:   testb $8, {{%ah|%ch}}
+; CHECK-64:   btl $11
 ; CHECK-64:   ret
 ; CHECK-32-LABEL: g64xh:
-; CHECK-32:   testb $8, %ah
+; CHECK-32:   btl $11
 ; CHECK-32:   ret
 define void @g64xh(i64 inreg %x) nounwind {
   %t = and i64 %x, 2048
@@ -37,10 +37,10 @@ no:
   ret void
 }
 ; CHECK-64-LABEL: g32xh:
-; CHECK-64:   testb $8, {{%ah|%ch}}
+; CHECK-64:   btl $11
 ; CHECK-64:   ret
 ; CHECK-32-LABEL: g32xh:
-; CHECK-32:   testb $8, %ah
+; CHECK-32:   btl $11
 ; CHECK-32:   ret
 define void @g32xh(i32 inreg %x) nounwind {
   %t = and i32 %x, 2048
@@ -71,10 +71,10 @@ no:
   ret void
 }
 ; CHECK-64-LABEL: g16xh:
-; CHECK-64:   testb $8, {{%ah|%ch}}
+; CHECK-64:   btl $11
 ; CHECK-64:   ret
 ; CHECK-32-LABEL: g16xh:
-; CHECK-32:   testb $8, %ah
+; CHECK-32:   btl $11
 ; CHECK-32:   ret
 define void @g16xh(i16 inreg %x) nounwind {
   %t = and i16 %x, 2048

Modified: llvm/trunk/test/CodeGen/X86/urem-i8-constant.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/urem-i8-constant.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/urem-i8-constant.ll (original)
+++ llvm/trunk/test/CodeGen/X86/urem-i8-constant.ll Fri Sep 22 12:37:21 2017
@@ -1,13 +1,15 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i386-unknown-unknown | FileCheck %s
 
+; computeKnownBits determines that we don't need a mask op that is required in the general case.
+
 define i8 @foo(i8 %tmp325) {
 ; CHECK-LABEL: foo:
 ; CHECK:       # BB#0:
 ; CHECK-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; CHECK-NEXT:    imull $111, %ecx, %eax
-; CHECK-NEXT:    andl $28672, %eax # imm = 0x7000
 ; CHECK-NEXT:    shrl $12, %eax
+; CHECK-NEXT:    movzwl %ax, %eax
 ; CHECK-NEXT:    movb $37, %dl
 ; CHECK-NEXT:    # kill: %AL<def> %AL<kill> %EAX<kill>
 ; CHECK-NEXT:    mulb %dl

Modified: llvm/trunk/test/CodeGen/X86/xor-icmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xor-icmp.ll?rev=314023&r1=314022&r2=314023&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xor-icmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xor-icmp.ll Fri Sep 22 12:37:21 2017
@@ -17,15 +17,13 @@ define i32 @t(i32 %a, i32 %b) nounwind s
 ;
 ; X64-LABEL: t:
 ; X64:       # BB#0: # %entry
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    xorl %esi, %eax
-; X64-NEXT:    testb $64, %ah
-; X64-NEXT:    je .LBB0_1
-; X64-NEXT:  # BB#2: # %bb1
+; X64-NEXT:    xorl %esi, %edi
 ; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    btl $14, %edi
+; X64-NEXT:    jae .LBB0_1
+; X64-NEXT:  # BB#2: # %bb1
 ; X64-NEXT:    jmp bar # TAILCALL
 ; X64-NEXT:  .LBB0_1: # %bb
-; X64-NEXT:    xorl %eax, %eax
 ; X64-NEXT:    jmp foo # TAILCALL
 entry:
   %0 = and i32 %a, 16384




More information about the llvm-commits mailing list