[llvm] r279960 - [AVX512] In some cases KORTEST instruction may be used instead of ZEXT + TEST sequence.

Igor Breger via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 29 01:52:54 PDT 2016


Author: ibreger
Date: Mon Aug 29 03:52:52 2016
New Revision: 279960

URL: http://llvm.org/viewvc/llvm-project?rev=279960&view=rev
Log:
[AVX512] In some cases KORTEST instruction may be used instead of ZEXT + TEST sequence.

Differential Revision: http://reviews.llvm.org/D23490

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-cmp.ll
    llvm/trunk/test/CodeGen/X86/avx512-i1test.ll
    llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
    llvm/trunk/test/CodeGen/X86/masked_memop.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=279960&r1=279959&r2=279960&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Aug 29 03:52:52 2016
@@ -14900,15 +14900,29 @@ static SDValue EmitKTEST(SDValue Op, Sel
   return SDValue();
 }
 
-/// Emit nodes that will be selected as "test Op0,Op0", or something
-/// equivalent.
-SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
-                                    SelectionDAG &DAG) const {
-  if (Op.getValueType() == MVT::i1) {
+static SDValue EmitTEST_i1(SDValue Op, SelectionDAG &DAG, const SDLoc &dl) {
+
+  // Most probably the value is in GPR, use ZEXT + CMP.
+  if(Op.getOpcode() == ISD::TRUNCATE ||
+     Op.getOpcode() == ISD::LOAD ||
+     Op.getOpcode() == ISD::CopyFromReg) {
     SDValue ExtOp = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i8, Op);
     return DAG.getNode(X86ISD::CMP, dl, MVT::i32, ExtOp,
                        DAG.getConstant(0, dl, MVT::i8));
   }
+
+  // Create cmp i1 that should be mapped to KORTEST.
+  return DAG.getNode(X86ISD::CMP, dl, MVT::i1, Op,
+                     DAG.getConstant(0, dl, MVT::i8));
+}
+
+/// Emit nodes that will be selected as "test Op0,Op0", or something
+/// equivalent.
+SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
+                                    SelectionDAG &DAG) const {
+  if (Op.getValueType() == MVT::i1)
+    return EmitTEST_i1(Op, DAG, dl);
+
   // CF and OF aren't always set the way we want. Determine which
   // of these we need.
   bool NeedCF = false;

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=279960&r1=279959&r2=279960&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Aug 29 03:52:52 2016
@@ -2476,6 +2476,10 @@ multiclass avx512_mask_testop_w<bits<8>
 defm KORTEST : avx512_mask_testop_w<0x98, "kortest", X86kortest>;
 defm KTEST   : avx512_mask_testop_w<0x99, "ktest", X86ktest, HasDQI>;
 
+def : Pat<(X86cmp VK1:$src, 0),
+          (KORTESTWrr (COPY_TO_REGCLASS VK1:$src, VK16),
+                      (COPY_TO_REGCLASS VK1:$src, VK16))>, Requires<[HasAVX512]>;
+
 // Mask shift
 multiclass avx512_mask_shiftop<bits<8> opc, string OpcodeStr, RegisterClass KRC,
                              SDNode OpNode> {

Modified: llvm/trunk/test/CodeGen/X86/avx512-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cmp.ll?rev=279960&r1=279959&r2=279960&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cmp.ll Mon Aug 29 03:52:52 2016
@@ -167,9 +167,7 @@ define i32 @test10(i64 %b, i64 %c, i1 %d
 ; ALL-NEXT:    kmovw %eax, %k1
 ; ALL-NEXT:    korw %k1, %k0, %k1
 ; ALL-NEXT:    kxorw %k1, %k0, %k0
-; ALL-NEXT:    kmovw %k0, %eax
-; ALL-NEXT:    andl $1, %eax
-; ALL-NEXT:    testb %al, %al
+; ALL-NEXT:    kortestw %k0, %k0
 ; ALL-NEXT:    je LBB8_1
 ; ALL-NEXT:  ## BB#2: ## %if.end.i
 ; ALL-NEXT:    movl $6, %eax

Modified: llvm/trunk/test/CodeGen/X86/avx512-i1test.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-i1test.ll?rev=279960&r1=279959&r2=279960&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-i1test.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-i1test.ll Mon Aug 29 03:52:52 2016
@@ -8,23 +8,19 @@ target triple = "x86_64-unknown-linux-gn
 define void @func() {
 ; CHECK-LABEL: func:
 ; CHECK:       ## BB#0: ## %L_10
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    testb %al, %al
+; CHECK-NEXT:    kortestw %k0, %k0
 ; CHECK-NEXT:    je LBB0_1
-; CHECK-NEXT:  ## BB#4: ## %L_30
+; CHECK-NEXT:  ## BB#3: ## %L_30
 ; CHECK-NEXT:    retq
-; CHECK-NEXT:  LBB0_1: ## %bb56
-; CHECK-NEXT:    xorl %eax, %eax
-; CHECK-NEXT:    jmp LBB0_2
 ; CHECK-NEXT:    .p2align 4, 0x90
-; CHECK-NEXT:  LBB0_3: ## %bb35
-; CHECK-NEXT:    ## in Loop: Header=BB0_2 Depth=1
-; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:  LBB0_2: ## %bb33
+; CHECK-NEXT:  LBB0_2: ## %bb35
+; CHECK-NEXT:    ## in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT:    kortestw %k0, %k0
+; CHECK-NEXT:  LBB0_1: ## %bb33
 ; CHECK-NEXT:    ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    testb %al, %al
-; CHECK-NEXT:    jne LBB0_2
-; CHECK-NEXT:    jmp LBB0_3
+; CHECK-NEXT:    kortestw %k0, %k0
+; CHECK-NEXT:    jne LBB0_1
+; CHECK-NEXT:    jmp LBB0_2
 bb1:
   br i1 undef, label %L_10, label %L_10
 

Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=279960&r1=279959&r2=279960&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Mon Aug 29 03:52:52 2016
@@ -200,9 +200,7 @@ define <16 x i32> @test11(<16 x i32>%a,
 ; KNL-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
 ; KNL-NEXT:    kshiftlw $11, %k0, %k0
 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    andl $1, %eax
-; KNL-NEXT:    testb %al, %al
+; KNL-NEXT:    kortestw %k0, %k0
 ; KNL-NEXT:    je LBB10_2
 ; KNL-NEXT:  ## BB#1: ## %A
 ; KNL-NEXT:    vmovdqa64 %zmm1, %zmm0
@@ -216,9 +214,7 @@ define <16 x i32> @test11(<16 x i32>%a,
 ; SKX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
 ; SKX-NEXT:    kshiftlw $11, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
-; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    testb %al, %al
+; SKX-NEXT:    kortestw %k0, %k0
 ; SKX-NEXT:    je LBB10_2
 ; SKX-NEXT:  ## BB#1: ## %A
 ; SKX-NEXT:    vmovdqa64 %zmm1, %zmm0
@@ -244,9 +240,7 @@ define i64 @test12(<16 x i64>%a, <16 x i
 ; KNL-NEXT:    kunpckbw %k0, %k1, %k0
 ; KNL-NEXT:    kshiftlw $15, %k0, %k0
 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    andl $1, %eax
-; KNL-NEXT:    testb %al, %al
+; KNL-NEXT:    kortestw %k0, %k0
 ; KNL-NEXT:    cmoveq %rsi, %rdi
 ; KNL-NEXT:    movq %rdi, %rax
 ; KNL-NEXT:    retq
@@ -258,9 +252,7 @@ define i64 @test12(<16 x i64>%a, <16 x i
 ; SKX-NEXT:    kunpckbw %k0, %k1, %k0
 ; SKX-NEXT:    kshiftlw $15, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
-; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    testb %al, %al
+; SKX-NEXT:    kortestw %k0, %k0
 ; SKX-NEXT:    cmoveq %rsi, %rdi
 ; SKX-NEXT:    movq %rdi, %rax
 ; SKX-NEXT:    retq
@@ -310,9 +302,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64
 ; KNL-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
 ; KNL-NEXT:    kshiftlw $11, %k0, %k0
 ; KNL-NEXT:    kshiftrw $15, %k0, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    andl $1, %eax
-; KNL-NEXT:    testb %al, %al
+; KNL-NEXT:    kortestw %k0, %k0
 ; KNL-NEXT:    cmoveq %rsi, %rdi
 ; KNL-NEXT:    movq %rdi, %rax
 ; KNL-NEXT:    retq
@@ -322,9 +312,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64
 ; SKX-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
 ; SKX-NEXT:    kshiftlb $3, %k0, %k0
 ; SKX-NEXT:    kshiftrb $7, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
-; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    testb %al, %al
+; SKX-NEXT:    kortestw %k0, %k0
 ; SKX-NEXT:    cmoveq %rsi, %rdi
 ; SKX-NEXT:    movq %rdi, %rax
 ; SKX-NEXT:    retq
@@ -1356,9 +1344,7 @@ define zeroext i8 @test_extractelement_v
 ; SKX-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k0
 ; SKX-NEXT:    kshiftlw $15, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
-; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    testb %al, %al
+; SKX-NEXT:    kortestw %k0, %k0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    addb $3, %al
 ; SKX-NEXT:    movzbl %al, %eax
@@ -1438,9 +1424,7 @@ define zeroext i8 @test_extractelement_v
 ; SKX:       ## BB#0:
 ; SKX-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
 ; SKX-NEXT:    kshiftrq $63, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
-; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    testb %al, %al
+; SKX-NEXT:    kortestw %k0, %k0
 ; SKX-NEXT:    sete %al
 ; SKX-NEXT:    addb $3, %al
 ; SKX-NEXT:    movzbl %al, %eax

Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=279960&r1=279959&r2=279960&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Mon Aug 29 03:52:52 2016
@@ -1648,38 +1648,32 @@ define <3 x i32> @test30(<3 x i32*> %bas
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
 ; SKX-NEXT:    vpmovsxdq %xmm1, %ymm1
 ; SKX-NEXT:    vpsllq $2, %ymm1, %ymm1
-; SKX-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
-; SKX-NEXT:    kmovw %k0, %eax
-; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    # implicit-def: %XMM1
-; SKX-NEXT:    testb %al, %al
+; SKX-NEXT:    vpaddq %ymm1, %ymm0, %ymm1
+; SKX-NEXT:    kortestw %k0, %k0
+; SKX-NEXT:    # implicit-def: %XMM0
 ; SKX-NEXT:    je .LBB29_2
 ; SKX-NEXT:  # BB#1: # %cond.load
-; SKX-NEXT:    vmovq %xmm0, %rax
-; SKX-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SKX-NEXT:    vmovq %xmm1, %rax
+; SKX-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SKX-NEXT:  .LBB29_2: # %else
 ; SKX-NEXT:    kshiftlw $14, %k1, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
-; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    testb %al, %al
+; SKX-NEXT:    kortestw %k0, %k0
 ; SKX-NEXT:    je .LBB29_4
 ; SKX-NEXT:  # BB#3: # %cond.load1
-; SKX-NEXT:    vpextrq $1, %xmm0, %rax
-; SKX-NEXT:    vpinsrd $1, (%rax), %xmm1, %xmm1
+; SKX-NEXT:    vpextrq $1, %xmm1, %rax
+; SKX-NEXT:    vpinsrd $1, (%rax), %xmm0, %xmm0
 ; SKX-NEXT:  .LBB29_4: # %else2
 ; SKX-NEXT:    kshiftlw $13, %k1, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
-; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    testb %al, %al
+; SKX-NEXT:    kortestw %k0, %k0
 ; SKX-NEXT:    je .LBB29_6
 ; SKX-NEXT:  # BB#5: # %cond.load4
-; SKX-NEXT:    vextracti64x2 $1, %ymm0, %xmm0
-; SKX-NEXT:    vmovq %xmm0, %rax
-; SKX-NEXT:    vpinsrd $2, (%rax), %xmm1, %xmm1
+; SKX-NEXT:    vextracti64x2 $1, %ymm1, %xmm1
+; SKX-NEXT:    vmovq %xmm1, %rax
+; SKX-NEXT:    vpinsrd $2, (%rax), %xmm0, %xmm0
 ; SKX-NEXT:  .LBB29_6: # %else5
-; SKX-NEXT:    vpblendmd %xmm1, %xmm3, %xmm0 {%k1}
+; SKX-NEXT:    vpblendmd %xmm0, %xmm3, %xmm0 {%k1}
 ; SKX-NEXT:    retq
 ;
 ; SKX_32-LABEL: test30:
@@ -1692,38 +1686,32 @@ define <3 x i32> @test30(<3 x i32*> %bas
 ; SKX_32-NEXT:    kshiftlw $15, %k1, %k0
 ; SKX_32-NEXT:    kshiftrw $15, %k0, %k0
 ; SKX_32-NEXT:    vpslld $2, %xmm1, %xmm1
-; SKX_32-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
-; SKX_32-NEXT:    kmovw %k0, %eax
-; SKX_32-NEXT:    andl $1, %eax
-; SKX_32-NEXT:    # implicit-def: %XMM1
-; SKX_32-NEXT:    testb %al, %al
+; SKX_32-NEXT:    vpaddd %xmm1, %xmm0, %xmm1
+; SKX_32-NEXT:    kortestw %k0, %k0
+; SKX_32-NEXT:    # implicit-def: %XMM0
 ; SKX_32-NEXT:    je .LBB29_2
 ; SKX_32-NEXT:  # BB#1: # %cond.load
-; SKX_32-NEXT:    vmovd %xmm0, %eax
-; SKX_32-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SKX_32-NEXT:    vmovd %xmm1, %eax
+; SKX_32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 ; SKX_32-NEXT:  .LBB29_2: # %else
 ; SKX_32-NEXT:    kshiftlw $14, %k1, %k0
 ; SKX_32-NEXT:    kshiftrw $15, %k0, %k0
-; SKX_32-NEXT:    kmovw %k0, %eax
-; SKX_32-NEXT:    andl $1, %eax
-; SKX_32-NEXT:    testb %al, %al
+; SKX_32-NEXT:    kortestw %k0, %k0
 ; SKX_32-NEXT:    je .LBB29_4
 ; SKX_32-NEXT:  # BB#3: # %cond.load1
-; SKX_32-NEXT:    vpextrd $1, %xmm0, %eax
-; SKX_32-NEXT:    vpinsrd $1, (%eax), %xmm1, %xmm1
+; SKX_32-NEXT:    vpextrd $1, %xmm1, %eax
+; SKX_32-NEXT:    vpinsrd $1, (%eax), %xmm0, %xmm0
 ; SKX_32-NEXT:  .LBB29_4: # %else2
 ; SKX_32-NEXT:    vmovdqa32 {{[0-9]+}}(%esp), %xmm2
 ; SKX_32-NEXT:    kshiftlw $13, %k1, %k0
 ; SKX_32-NEXT:    kshiftrw $15, %k0, %k0
-; SKX_32-NEXT:    kmovw %k0, %eax
-; SKX_32-NEXT:    andl $1, %eax
-; SKX_32-NEXT:    testb %al, %al
+; SKX_32-NEXT:    kortestw %k0, %k0
 ; SKX_32-NEXT:    je .LBB29_6
 ; SKX_32-NEXT:  # BB#5: # %cond.load4
-; SKX_32-NEXT:    vpextrd $2, %xmm0, %eax
-; SKX_32-NEXT:    vpinsrd $2, (%eax), %xmm1, %xmm1
+; SKX_32-NEXT:    vpextrd $2, %xmm1, %eax
+; SKX_32-NEXT:    vpinsrd $2, (%eax), %xmm0, %xmm0
 ; SKX_32-NEXT:  .LBB29_6: # %else5
-; SKX_32-NEXT:    vpblendmd %xmm1, %xmm2, %xmm0 {%k1}
+; SKX_32-NEXT:    vpblendmd %xmm0, %xmm2, %xmm0 {%k1}
 ; SKX_32-NEXT:    addl $12, %esp
 ; SKX_32-NEXT:    retl
 

Modified: llvm/trunk/test/CodeGen/X86/masked_memop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_memop.ll?rev=279960&r1=279959&r2=279960&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_memop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_memop.ll Mon Aug 29 03:52:52 2016
@@ -2345,10 +2345,8 @@ define <16 x i8> @test_mask_load_16xi8(<
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    ## implicit-def: %XMM0
-; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    je LBB50_2
 ; AVX512F-NEXT:  ## BB#1: ## %cond.load
 ; AVX512F-NEXT:    movzbl (%rdi), %eax
@@ -2356,134 +2354,104 @@ define <16 x i8> @test_mask_load_16xi8(<
 ; AVX512F-NEXT:  LBB50_2: ## %else
 ; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_4
 ; AVX512F-NEXT:  ## BB#3: ## %cond.load1
 ; AVX512F-NEXT:    vpinsrb $1, 1(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_4: ## %else2
 ; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_6
 ; AVX512F-NEXT:  ## BB#5: ## %cond.load4
 ; AVX512F-NEXT:    vpinsrb $2, 2(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_6: ## %else5
 ; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_8
 ; AVX512F-NEXT:  ## BB#7: ## %cond.load7
 ; AVX512F-NEXT:    vpinsrb $3, 3(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_8: ## %else8
 ; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_10
 ; AVX512F-NEXT:  ## BB#9: ## %cond.load10
 ; AVX512F-NEXT:    vpinsrb $4, 4(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_10: ## %else11
 ; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_12
 ; AVX512F-NEXT:  ## BB#11: ## %cond.load13
 ; AVX512F-NEXT:    vpinsrb $5, 5(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_12: ## %else14
 ; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_14
 ; AVX512F-NEXT:  ## BB#13: ## %cond.load16
 ; AVX512F-NEXT:    vpinsrb $6, 6(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_14: ## %else17
 ; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_16
 ; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 ; AVX512F-NEXT:    vpinsrb $7, 7(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_16: ## %else20
 ; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_18
 ; AVX512F-NEXT:  ## BB#17: ## %cond.load22
 ; AVX512F-NEXT:    vpinsrb $8, 8(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_18: ## %else23
 ; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_20
 ; AVX512F-NEXT:  ## BB#19: ## %cond.load25
 ; AVX512F-NEXT:    vpinsrb $9, 9(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_20: ## %else26
 ; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_22
 ; AVX512F-NEXT:  ## BB#21: ## %cond.load28
 ; AVX512F-NEXT:    vpinsrb $10, 10(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_22: ## %else29
 ; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_24
 ; AVX512F-NEXT:  ## BB#23: ## %cond.load31
 ; AVX512F-NEXT:    vpinsrb $11, 11(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_24: ## %else32
 ; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_26
 ; AVX512F-NEXT:  ## BB#25: ## %cond.load34
 ; AVX512F-NEXT:    vpinsrb $12, 12(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_26: ## %else35
 ; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_28
 ; AVX512F-NEXT:  ## BB#27: ## %cond.load37
 ; AVX512F-NEXT:    vpinsrb $13, 13(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_28: ## %else38
 ; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_30
 ; AVX512F-NEXT:  ## BB#29: ## %cond.load40
 ; AVX512F-NEXT:    vpinsrb $14, 14(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB50_30: ## %else41
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB50_32
 ; AVX512F-NEXT:  ## BB#31: ## %cond.load43
 ; AVX512F-NEXT:    vpinsrb $15, 15(%rdi), %xmm0, %xmm0
@@ -4628,9 +4596,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_2
 ; AVX512F-NEXT:  ## BB#1: ## %cond.load
 ; AVX512F-NEXT:    movzbl (%rdi), %eax
@@ -4639,9 +4605,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_4
 ; AVX512F-NEXT:  ## BB#3: ## %cond.load1
 ; AVX512F-NEXT:    vpinsrb $1, 1(%rdi), %xmm0, %xmm6
@@ -4650,9 +4614,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_6
 ; AVX512F-NEXT:  ## BB#5: ## %cond.load4
 ; AVX512F-NEXT:    vpinsrb $2, 2(%rdi), %xmm0, %xmm6
@@ -4661,9 +4623,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_8
 ; AVX512F-NEXT:  ## BB#7: ## %cond.load7
 ; AVX512F-NEXT:    vpinsrb $3, 3(%rdi), %xmm0, %xmm6
@@ -4672,9 +4632,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_10
 ; AVX512F-NEXT:  ## BB#9: ## %cond.load10
 ; AVX512F-NEXT:    vpinsrb $4, 4(%rdi), %xmm0, %xmm6
@@ -4683,9 +4641,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_12
 ; AVX512F-NEXT:  ## BB#11: ## %cond.load13
 ; AVX512F-NEXT:    vpinsrb $5, 5(%rdi), %xmm0, %xmm6
@@ -4694,9 +4650,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_14
 ; AVX512F-NEXT:  ## BB#13: ## %cond.load16
 ; AVX512F-NEXT:    vpinsrb $6, 6(%rdi), %xmm0, %xmm6
@@ -4705,9 +4659,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_16
 ; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 ; AVX512F-NEXT:    vpinsrb $7, 7(%rdi), %xmm0, %xmm6
@@ -4716,9 +4668,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_18
 ; AVX512F-NEXT:  ## BB#17: ## %cond.load22
 ; AVX512F-NEXT:    vpinsrb $8, 8(%rdi), %xmm0, %xmm6
@@ -4727,9 +4677,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, (%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_20
 ; AVX512F-NEXT:  ## BB#19: ## %cond.load25
 ; AVX512F-NEXT:    vpinsrb $9, 9(%rdi), %xmm0, %xmm6
@@ -4738,9 +4686,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_22
 ; AVX512F-NEXT:  ## BB#21: ## %cond.load28
 ; AVX512F-NEXT:    vpinsrb $10, 10(%rdi), %xmm0, %xmm6
@@ -4749,9 +4695,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_24
 ; AVX512F-NEXT:  ## BB#23: ## %cond.load31
 ; AVX512F-NEXT:    vpinsrb $11, 11(%rdi), %xmm0, %xmm6
@@ -4760,9 +4704,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_26
 ; AVX512F-NEXT:  ## BB#25: ## %cond.load34
 ; AVX512F-NEXT:    vpinsrb $12, 12(%rdi), %xmm0, %xmm6
@@ -4772,9 +4714,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_28
 ; AVX512F-NEXT:  ## BB#27: ## %cond.load37
 ; AVX512F-NEXT:    vpinsrb $13, 13(%rdi), %xmm0, %xmm6
@@ -4784,9 +4724,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_30
 ; AVX512F-NEXT:  ## BB#29: ## %cond.load40
 ; AVX512F-NEXT:    vpinsrb $14, 14(%rdi), %xmm0, %xmm6
@@ -4795,9 +4733,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_32
 ; AVX512F-NEXT:  ## BB#31: ## %cond.load43
 ; AVX512F-NEXT:    vpinsrb $15, 15(%rdi), %xmm0, %xmm1
@@ -4806,9 +4742,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_34
 ; AVX512F-NEXT:  ## BB#33: ## %cond.load46
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4818,9 +4752,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_36
 ; AVX512F-NEXT:  ## BB#35: ## %cond.load49
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4830,9 +4762,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_38
 ; AVX512F-NEXT:  ## BB#37: ## %cond.load52
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4842,9 +4772,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_40
 ; AVX512F-NEXT:  ## BB#39: ## %cond.load55
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4854,9 +4782,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_42
 ; AVX512F-NEXT:  ## BB#41: ## %cond.load58
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4866,9 +4792,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_44
 ; AVX512F-NEXT:  ## BB#43: ## %cond.load61
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4878,9 +4802,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_46
 ; AVX512F-NEXT:  ## BB#45: ## %cond.load64
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4890,9 +4812,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_48
 ; AVX512F-NEXT:  ## BB#47: ## %cond.load67
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4902,9 +4822,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_50
 ; AVX512F-NEXT:  ## BB#49: ## %cond.load70
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4914,9 +4832,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_52
 ; AVX512F-NEXT:  ## BB#51: ## %cond.load73
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4926,9 +4842,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_54
 ; AVX512F-NEXT:  ## BB#53: ## %cond.load76
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4938,9 +4852,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_56
 ; AVX512F-NEXT:  ## BB#55: ## %cond.load79
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4950,9 +4862,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_58
 ; AVX512F-NEXT:  ## BB#57: ## %cond.load82
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -4963,9 +4873,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_60
 ; AVX512F-NEXT:  ## BB#59: ## %cond.load85
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
@@ -4976,9 +4884,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_62
 ; AVX512F-NEXT:  ## BB#61: ## %cond.load88
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm2
@@ -4988,9 +4894,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    vptestmd %zmm1, %zmm1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_64
 ; AVX512F-NEXT:  ## BB#63: ## %cond.load91
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -5000,9 +4904,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_66
 ; AVX512F-NEXT:  ## BB#65: ## %cond.load94
 ; AVX512F-NEXT:    vpinsrb $0, 32(%rdi), %xmm0, %xmm1
@@ -5011,9 +4913,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_68
 ; AVX512F-NEXT:  ## BB#67: ## %cond.load97
 ; AVX512F-NEXT:    vpinsrb $1, 33(%rdi), %xmm1, %xmm2
@@ -5022,9 +4922,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_70
 ; AVX512F-NEXT:  ## BB#69: ## %cond.load100
 ; AVX512F-NEXT:    vpinsrb $2, 34(%rdi), %xmm1, %xmm2
@@ -5033,9 +4931,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_72
 ; AVX512F-NEXT:  ## BB#71: ## %cond.load103
 ; AVX512F-NEXT:    vpinsrb $3, 35(%rdi), %xmm1, %xmm2
@@ -5044,9 +4940,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_74
 ; AVX512F-NEXT:  ## BB#73: ## %cond.load106
 ; AVX512F-NEXT:    vpinsrb $4, 36(%rdi), %xmm1, %xmm2
@@ -5055,9 +4949,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_76
 ; AVX512F-NEXT:  ## BB#75: ## %cond.load109
 ; AVX512F-NEXT:    vpinsrb $5, 37(%rdi), %xmm1, %xmm2
@@ -5066,9 +4958,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_78
 ; AVX512F-NEXT:  ## BB#77: ## %cond.load112
 ; AVX512F-NEXT:    vpinsrb $6, 38(%rdi), %xmm1, %xmm2
@@ -5077,9 +4967,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_80
 ; AVX512F-NEXT:  ## BB#79: ## %cond.load115
 ; AVX512F-NEXT:    vpinsrb $7, 39(%rdi), %xmm1, %xmm2
@@ -5088,9 +4976,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_82
 ; AVX512F-NEXT:  ## BB#81: ## %cond.load118
 ; AVX512F-NEXT:    vpinsrb $8, 40(%rdi), %xmm1, %xmm2
@@ -5099,9 +4985,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_84
 ; AVX512F-NEXT:  ## BB#83: ## %cond.load121
 ; AVX512F-NEXT:    vpinsrb $9, 41(%rdi), %xmm1, %xmm2
@@ -5110,9 +4994,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_86
 ; AVX512F-NEXT:  ## BB#85: ## %cond.load124
 ; AVX512F-NEXT:    vpinsrb $10, 42(%rdi), %xmm1, %xmm2
@@ -5121,9 +5003,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_88
 ; AVX512F-NEXT:  ## BB#87: ## %cond.load127
 ; AVX512F-NEXT:    vpinsrb $11, 43(%rdi), %xmm1, %xmm2
@@ -5132,9 +5012,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_90
 ; AVX512F-NEXT:  ## BB#89: ## %cond.load130
 ; AVX512F-NEXT:    vpinsrb $12, 44(%rdi), %xmm1, %xmm2
@@ -5144,9 +5022,7 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_92
 ; AVX512F-NEXT:  ## BB#91: ## %cond.load133
 ; AVX512F-NEXT:    vpinsrb $13, 45(%rdi), %xmm1, %xmm3
@@ -5156,203 +5032,167 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512F-NEXT:    kmovw %k1, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB52_94
 ; AVX512F-NEXT:  ## BB#93: ## %cond.load136
 ; AVX512F-NEXT:    vpinsrb $14, 46(%rdi), %xmm1, %xmm3
 ; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm3[0,1,2,3],ymm1[4,5,6,7]
 ; AVX512F-NEXT:  LBB52_94: ## %else137
-; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k5
+; AVX512F-NEXT:    vptestmd %zmm2, %zmm2, %k7
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, {{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_96
 ; AVX512F-NEXT:  ## BB#95: ## %cond.load139
 ; AVX512F-NEXT:    vpinsrb $15, 47(%rdi), %xmm1, %xmm2
 ; AVX512F-NEXT:    vpblendd {{.*#+}} ymm1 = ymm2[0,1,2,3],ymm1[4,5,6,7]
 ; AVX512F-NEXT:  LBB52_96: ## %else140
-; AVX512F-NEXT:    kshiftlw $15, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $15, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_98
 ; AVX512F-NEXT:  ## BB#97: ## %cond.load142
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $0, 48(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_98: ## %else143
-; AVX512F-NEXT:    kshiftlw $14, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $14, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_100
 ; AVX512F-NEXT:  ## BB#99: ## %cond.load145
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $1, 49(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_100: ## %else146
-; AVX512F-NEXT:    kshiftlw $13, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $13, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_102
 ; AVX512F-NEXT:  ## BB#101: ## %cond.load148
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $2, 50(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_102: ## %else149
-; AVX512F-NEXT:    kshiftlw $12, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $12, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_104
 ; AVX512F-NEXT:  ## BB#103: ## %cond.load151
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $3, 51(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_104: ## %else152
-; AVX512F-NEXT:    kshiftlw $11, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $11, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_106
 ; AVX512F-NEXT:  ## BB#105: ## %cond.load154
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $4, 52(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_106: ## %else155
-; AVX512F-NEXT:    kshiftlw $10, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $10, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_108
 ; AVX512F-NEXT:  ## BB#107: ## %cond.load157
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $5, 53(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_108: ## %else158
-; AVX512F-NEXT:    kshiftlw $9, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $9, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_110
 ; AVX512F-NEXT:  ## BB#109: ## %cond.load160
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $6, 54(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_110: ## %else161
-; AVX512F-NEXT:    kshiftlw $8, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $8, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_112
 ; AVX512F-NEXT:  ## BB#111: ## %cond.load163
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $7, 55(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_112: ## %else164
-; AVX512F-NEXT:    kshiftlw $7, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $7, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
 ; AVX512F-NEXT:    kmovw %k0, -{{[0-9]+}}(%rsp) ## 2-byte Spill
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_114
 ; AVX512F-NEXT:  ## BB#113: ## %cond.load166
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $8, 56(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_114: ## %else167
-; AVX512F-NEXT:    kshiftlw $6, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $6, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k2
-; AVX512F-NEXT:    kmovw %k2, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k2, %k2
 ; AVX512F-NEXT:    je LBB52_116
 ; AVX512F-NEXT:  ## BB#115: ## %cond.load169
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $9, 57(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_116: ## %else170
-; AVX512F-NEXT:    kshiftlw $5, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $5, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k3
-; AVX512F-NEXT:    kmovw %k3, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k3, %k3
 ; AVX512F-NEXT:    je LBB52_118
 ; AVX512F-NEXT:  ## BB#117: ## %cond.load172
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $10, 58(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_118: ## %else173
-; AVX512F-NEXT:    kshiftlw $4, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $4, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k4
-; AVX512F-NEXT:    kmovw %k4, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k4, %k4
 ; AVX512F-NEXT:    je LBB52_120
 ; AVX512F-NEXT:  ## BB#119: ## %cond.load175
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $11, 59(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_120: ## %else176
-; AVX512F-NEXT:    kshiftlw $3, %k5, %k0
-; AVX512F-NEXT:    kshiftrw $15, %k0, %k6
-; AVX512F-NEXT:    kmovw %k6, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kshiftlw $3, %k7, %k0
+; AVX512F-NEXT:    kshiftrw $15, %k0, %k5
+; AVX512F-NEXT:    kortestw %k5, %k5
 ; AVX512F-NEXT:    je LBB52_122
 ; AVX512F-NEXT:  ## BB#121: ## %cond.load178
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $12, 60(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_122: ## %else179
-; AVX512F-NEXT:    kshiftlw $2, %k5, %k0
-; AVX512F-NEXT:    kshiftrw $15, %k0, %k7
-; AVX512F-NEXT:    kmovw %k7, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kshiftlw $2, %k7, %k0
+; AVX512F-NEXT:    kshiftrw $15, %k0, %k6
+; AVX512F-NEXT:    kortestw %k6, %k6
 ; AVX512F-NEXT:    je LBB52_124
 ; AVX512F-NEXT:  ## BB#123: ## %cond.load181
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $13, 61(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_124: ## %else182
-; AVX512F-NEXT:    kshiftlw $1, %k5, %k0
+; AVX512F-NEXT:    kshiftlw $1, %k7, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB52_126
 ; AVX512F-NEXT:  ## BB#125: ## %cond.load184
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
 ; AVX512F-NEXT:    vpinsrb $14, 62(%rdi), %xmm2, %xmm2
 ; AVX512F-NEXT:    vinserti128 $1, %xmm2, %ymm1, %ymm1
 ; AVX512F-NEXT:  LBB52_126: ## %else185
-; AVX512F-NEXT:    kshiftrw $15, %k5, %k5
-; AVX512F-NEXT:    kmovw %k5, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kshiftrw $15, %k7, %k7
+; AVX512F-NEXT:    kortestw %k7, %k7
 ; AVX512F-NEXT:    je LBB52_128
 ; AVX512F-NEXT:  ## BB#127: ## %cond.load187
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm2
@@ -5486,10 +5326,10 @@ define <64 x i8> @test_mask_load_64xi8(<
 ; AVX512F-NEXT:    movl %eax, {{[0-9]+}}(%rsp) ## 4-byte Spill
 ; AVX512F-NEXT:    kmovw %k3, %r12d
 ; AVX512F-NEXT:    kmovw %k4, %r15d
-; AVX512F-NEXT:    kmovw %k6, %r14d
-; AVX512F-NEXT:    kmovw %k7, %ebx
+; AVX512F-NEXT:    kmovw %k5, %r14d
+; AVX512F-NEXT:    kmovw %k6, %ebx
 ; AVX512F-NEXT:    kmovw %k0, %r11d
-; AVX512F-NEXT:    kmovw %k5, %r10d
+; AVX512F-NEXT:    kmovw %k7, %r10d
 ; AVX512F-NEXT:    kmovw -{{[0-9]+}}(%rsp), %k0 ## 2-byte Reload
 ; AVX512F-NEXT:    kmovw %k0, %eax
 ; AVX512F-NEXT:    movl %eax, -{{[0-9]+}}(%rsp) ## 4-byte Spill
@@ -5688,10 +5528,8 @@ define <8 x i16> @test_mask_load_8xi16(<
 ; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k1
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    ## implicit-def: %XMM0
-; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    je LBB53_2
 ; AVX512F-NEXT:  ## BB#1: ## %cond.load
 ; AVX512F-NEXT:    movzwl (%rdi), %eax
@@ -5699,63 +5537,49 @@ define <8 x i16> @test_mask_load_8xi16(<
 ; AVX512F-NEXT:  LBB53_2: ## %else
 ; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB53_4
 ; AVX512F-NEXT:  ## BB#3: ## %cond.load1
 ; AVX512F-NEXT:    vpinsrw $1, 2(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB53_4: ## %else2
 ; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB53_6
 ; AVX512F-NEXT:  ## BB#5: ## %cond.load4
 ; AVX512F-NEXT:    vpinsrw $2, 4(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB53_6: ## %else5
 ; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB53_8
 ; AVX512F-NEXT:  ## BB#7: ## %cond.load7
 ; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB53_8: ## %else8
 ; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB53_10
 ; AVX512F-NEXT:  ## BB#9: ## %cond.load10
 ; AVX512F-NEXT:    vpinsrw $4, 8(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB53_10: ## %else11
 ; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB53_12
 ; AVX512F-NEXT:  ## BB#11: ## %cond.load13
 ; AVX512F-NEXT:    vpinsrw $5, 10(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB53_12: ## %else14
 ; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB53_14
 ; AVX512F-NEXT:  ## BB#13: ## %cond.load16
 ; AVX512F-NEXT:    vpinsrw $6, 12(%rdi), %xmm0, %xmm0
 ; AVX512F-NEXT:  LBB53_14: ## %else17
 ; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB53_16
 ; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 ; AVX512F-NEXT:    vpinsrw $7, 14(%rdi), %xmm0, %xmm0
@@ -6050,10 +5874,8 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    ## implicit-def: %YMM0
-; AVX512F-NEXT:    testb %al, %al
 ; AVX512F-NEXT:    je LBB54_2
 ; AVX512F-NEXT:  ## BB#1: ## %cond.load
 ; AVX512F-NEXT:    movzwl (%rdi), %eax
@@ -6061,9 +5883,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_2: ## %else
 ; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_4
 ; AVX512F-NEXT:  ## BB#3: ## %cond.load1
 ; AVX512F-NEXT:    vpinsrw $1, 2(%rdi), %xmm0, %xmm1
@@ -6071,9 +5891,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_4: ## %else2
 ; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_6
 ; AVX512F-NEXT:  ## BB#5: ## %cond.load4
 ; AVX512F-NEXT:    vpinsrw $2, 4(%rdi), %xmm0, %xmm1
@@ -6081,9 +5899,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_6: ## %else5
 ; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_8
 ; AVX512F-NEXT:  ## BB#7: ## %cond.load7
 ; AVX512F-NEXT:    vpinsrw $3, 6(%rdi), %xmm0, %xmm1
@@ -6091,9 +5907,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_8: ## %else8
 ; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_10
 ; AVX512F-NEXT:  ## BB#9: ## %cond.load10
 ; AVX512F-NEXT:    vpinsrw $4, 8(%rdi), %xmm0, %xmm1
@@ -6101,9 +5915,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_10: ## %else11
 ; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_12
 ; AVX512F-NEXT:  ## BB#11: ## %cond.load13
 ; AVX512F-NEXT:    vpinsrw $5, 10(%rdi), %xmm0, %xmm1
@@ -6111,9 +5923,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_12: ## %else14
 ; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_14
 ; AVX512F-NEXT:  ## BB#13: ## %cond.load16
 ; AVX512F-NEXT:    vpinsrw $6, 12(%rdi), %xmm0, %xmm1
@@ -6121,9 +5931,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_14: ## %else17
 ; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_16
 ; AVX512F-NEXT:  ## BB#15: ## %cond.load19
 ; AVX512F-NEXT:    vpinsrw $7, 14(%rdi), %xmm0, %xmm1
@@ -6131,9 +5939,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_16: ## %else20
 ; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_18
 ; AVX512F-NEXT:  ## BB#17: ## %cond.load22
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -6142,9 +5948,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_18: ## %else23
 ; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_20
 ; AVX512F-NEXT:  ## BB#19: ## %cond.load25
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -6153,9 +5957,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_20: ## %else26
 ; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_22
 ; AVX512F-NEXT:  ## BB#21: ## %cond.load28
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -6164,9 +5966,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_22: ## %else29
 ; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_24
 ; AVX512F-NEXT:  ## BB#23: ## %cond.load31
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -6175,9 +5975,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_24: ## %else32
 ; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_26
 ; AVX512F-NEXT:  ## BB#25: ## %cond.load34
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -6186,9 +5984,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_26: ## %else35
 ; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_28
 ; AVX512F-NEXT:  ## BB#27: ## %cond.load37
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -6197,9 +5993,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:  LBB54_28: ## %else38
 ; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_30
 ; AVX512F-NEXT:  ## BB#29: ## %cond.load40
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -6207,9 +6001,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; AVX512F-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
 ; AVX512F-NEXT:  LBB54_30: ## %else41
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB54_32
 ; AVX512F-NEXT:  ## BB#31: ## %cond.load43
 ; AVX512F-NEXT:    vextracti128 $1, %ymm0, %xmm1
@@ -7125,143 +6917,111 @@ define void @test_mask_store_16xi8(<16 x
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_2
 ; AVX512F-NEXT:  ## BB#1: ## %cond.store
 ; AVX512F-NEXT:    vpextrb $0, %xmm1, (%rdi)
 ; AVX512F-NEXT:  LBB56_2: ## %else
 ; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_4
 ; AVX512F-NEXT:  ## BB#3: ## %cond.store1
 ; AVX512F-NEXT:    vpextrb $1, %xmm1, 1(%rdi)
 ; AVX512F-NEXT:  LBB56_4: ## %else2
 ; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_6
 ; AVX512F-NEXT:  ## BB#5: ## %cond.store3
 ; AVX512F-NEXT:    vpextrb $2, %xmm1, 2(%rdi)
 ; AVX512F-NEXT:  LBB56_6: ## %else4
 ; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_8
 ; AVX512F-NEXT:  ## BB#7: ## %cond.store5
 ; AVX512F-NEXT:    vpextrb $3, %xmm1, 3(%rdi)
 ; AVX512F-NEXT:  LBB56_8: ## %else6
 ; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_10
 ; AVX512F-NEXT:  ## BB#9: ## %cond.store7
 ; AVX512F-NEXT:    vpextrb $4, %xmm1, 4(%rdi)
 ; AVX512F-NEXT:  LBB56_10: ## %else8
 ; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_12
 ; AVX512F-NEXT:  ## BB#11: ## %cond.store9
 ; AVX512F-NEXT:    vpextrb $5, %xmm1, 5(%rdi)
 ; AVX512F-NEXT:  LBB56_12: ## %else10
 ; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_14
 ; AVX512F-NEXT:  ## BB#13: ## %cond.store11
 ; AVX512F-NEXT:    vpextrb $6, %xmm1, 6(%rdi)
 ; AVX512F-NEXT:  LBB56_14: ## %else12
 ; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_16
 ; AVX512F-NEXT:  ## BB#15: ## %cond.store13
 ; AVX512F-NEXT:    vpextrb $7, %xmm1, 7(%rdi)
 ; AVX512F-NEXT:  LBB56_16: ## %else14
 ; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_18
 ; AVX512F-NEXT:  ## BB#17: ## %cond.store15
 ; AVX512F-NEXT:    vpextrb $8, %xmm1, 8(%rdi)
 ; AVX512F-NEXT:  LBB56_18: ## %else16
 ; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_20
 ; AVX512F-NEXT:  ## BB#19: ## %cond.store17
 ; AVX512F-NEXT:    vpextrb $9, %xmm1, 9(%rdi)
 ; AVX512F-NEXT:  LBB56_20: ## %else18
 ; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_22
 ; AVX512F-NEXT:  ## BB#21: ## %cond.store19
 ; AVX512F-NEXT:    vpextrb $10, %xmm1, 10(%rdi)
 ; AVX512F-NEXT:  LBB56_22: ## %else20
 ; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_24
 ; AVX512F-NEXT:  ## BB#23: ## %cond.store21
 ; AVX512F-NEXT:    vpextrb $11, %xmm1, 11(%rdi)
 ; AVX512F-NEXT:  LBB56_24: ## %else22
 ; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_26
 ; AVX512F-NEXT:  ## BB#25: ## %cond.store23
 ; AVX512F-NEXT:    vpextrb $12, %xmm1, 12(%rdi)
 ; AVX512F-NEXT:  LBB56_26: ## %else24
 ; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_28
 ; AVX512F-NEXT:  ## BB#27: ## %cond.store25
 ; AVX512F-NEXT:    vpextrb $13, %xmm1, 13(%rdi)
 ; AVX512F-NEXT:  LBB56_28: ## %else26
 ; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB56_30
 ; AVX512F-NEXT:  ## BB#29: ## %cond.store27
 ; AVX512F-NEXT:    vpextrb $14, %xmm1, 14(%rdi)
 ; AVX512F-NEXT:  LBB56_30: ## %else28
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB56_32
 ; AVX512F-NEXT:  ## BB#31: ## %cond.store29
 ; AVX512F-NEXT:    vpextrb $15, %xmm1, 15(%rdi)
@@ -8772,117 +8532,91 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_2
 ; AVX512F-NEXT:  ## BB#1: ## %cond.store
 ; AVX512F-NEXT:    vpextrb $0, %xmm4, (%rdi)
 ; AVX512F-NEXT:  LBB58_2: ## %else
 ; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_4
 ; AVX512F-NEXT:  ## BB#3: ## %cond.store1
 ; AVX512F-NEXT:    vpextrb $1, %xmm4, 1(%rdi)
 ; AVX512F-NEXT:  LBB58_4: ## %else2
 ; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_6
 ; AVX512F-NEXT:  ## BB#5: ## %cond.store3
 ; AVX512F-NEXT:    vpextrb $2, %xmm4, 2(%rdi)
 ; AVX512F-NEXT:  LBB58_6: ## %else4
 ; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_8
 ; AVX512F-NEXT:  ## BB#7: ## %cond.store5
 ; AVX512F-NEXT:    vpextrb $3, %xmm4, 3(%rdi)
 ; AVX512F-NEXT:  LBB58_8: ## %else6
 ; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_10
 ; AVX512F-NEXT:  ## BB#9: ## %cond.store7
 ; AVX512F-NEXT:    vpextrb $4, %xmm4, 4(%rdi)
 ; AVX512F-NEXT:  LBB58_10: ## %else8
 ; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_12
 ; AVX512F-NEXT:  ## BB#11: ## %cond.store9
 ; AVX512F-NEXT:    vpextrb $5, %xmm4, 5(%rdi)
 ; AVX512F-NEXT:  LBB58_12: ## %else10
 ; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_14
 ; AVX512F-NEXT:  ## BB#13: ## %cond.store11
 ; AVX512F-NEXT:    vpextrb $6, %xmm4, 6(%rdi)
 ; AVX512F-NEXT:  LBB58_14: ## %else12
 ; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_16
 ; AVX512F-NEXT:  ## BB#15: ## %cond.store13
 ; AVX512F-NEXT:    vpextrb $7, %xmm4, 7(%rdi)
 ; AVX512F-NEXT:  LBB58_16: ## %else14
 ; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_18
 ; AVX512F-NEXT:  ## BB#17: ## %cond.store15
 ; AVX512F-NEXT:    vpextrb $8, %xmm4, 8(%rdi)
 ; AVX512F-NEXT:  LBB58_18: ## %else16
 ; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_20
 ; AVX512F-NEXT:  ## BB#19: ## %cond.store17
 ; AVX512F-NEXT:    vpextrb $9, %xmm4, 9(%rdi)
 ; AVX512F-NEXT:  LBB58_20: ## %else18
 ; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_22
 ; AVX512F-NEXT:  ## BB#21: ## %cond.store19
 ; AVX512F-NEXT:    vpextrb $10, %xmm4, 10(%rdi)
 ; AVX512F-NEXT:  LBB58_22: ## %else20
 ; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_24
 ; AVX512F-NEXT:  ## BB#23: ## %cond.store21
 ; AVX512F-NEXT:    vpextrb $11, %xmm4, 11(%rdi)
 ; AVX512F-NEXT:  LBB58_24: ## %else22
 ; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_26
 ; AVX512F-NEXT:  ## BB#25: ## %cond.store23
 ; AVX512F-NEXT:    vpextrb $12, %xmm4, 12(%rdi)
@@ -8890,9 +8624,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:    vpmovsxbd %xmm1, %zmm0
 ; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_28
 ; AVX512F-NEXT:  ## BB#27: ## %cond.store25
 ; AVX512F-NEXT:    vpextrb $13, %xmm4, 13(%rdi)
@@ -8900,27 +8632,21 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 ; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_30
 ; AVX512F-NEXT:  ## BB#29: ## %cond.store27
 ; AVX512F-NEXT:    vpextrb $14, %xmm4, 14(%rdi)
 ; AVX512F-NEXT:  LBB58_30: ## %else28
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_32
 ; AVX512F-NEXT:  ## BB#31: ## %cond.store29
 ; AVX512F-NEXT:    vpextrb $15, %xmm4, 15(%rdi)
 ; AVX512F-NEXT:  LBB58_32: ## %else30
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_34
 ; AVX512F-NEXT:  ## BB#33: ## %cond.store31
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -8928,9 +8654,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_34: ## %else32
 ; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_36
 ; AVX512F-NEXT:  ## BB#35: ## %cond.store33
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -8938,9 +8662,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_36: ## %else34
 ; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_38
 ; AVX512F-NEXT:  ## BB#37: ## %cond.store35
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -8948,9 +8670,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_38: ## %else36
 ; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_40
 ; AVX512F-NEXT:  ## BB#39: ## %cond.store37
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -8958,9 +8678,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_40: ## %else38
 ; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_42
 ; AVX512F-NEXT:  ## BB#41: ## %cond.store39
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -8968,9 +8686,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_42: ## %else40
 ; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_44
 ; AVX512F-NEXT:  ## BB#43: ## %cond.store41
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -8978,9 +8694,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_44: ## %else42
 ; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_46
 ; AVX512F-NEXT:  ## BB#45: ## %cond.store43
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -8988,9 +8702,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_46: ## %else44
 ; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_48
 ; AVX512F-NEXT:  ## BB#47: ## %cond.store45
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -8998,9 +8710,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_48: ## %else46
 ; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_50
 ; AVX512F-NEXT:  ## BB#49: ## %cond.store47
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -9008,9 +8718,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_50: ## %else48
 ; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_52
 ; AVX512F-NEXT:  ## BB#51: ## %cond.store49
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -9018,9 +8726,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_52: ## %else50
 ; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_54
 ; AVX512F-NEXT:  ## BB#53: ## %cond.store51
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -9028,9 +8734,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_54: ## %else52
 ; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_56
 ; AVX512F-NEXT:  ## BB#55: ## %cond.store53
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -9038,9 +8742,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_56: ## %else54
 ; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_58
 ; AVX512F-NEXT:  ## BB#57: ## %cond.store55
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -9049,9 +8751,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:    vpmovsxbd %xmm2, %zmm0
 ; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_60
 ; AVX512F-NEXT:  ## BB#59: ## %cond.store57
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm1
@@ -9060,9 +8760,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 ; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_62
 ; AVX512F-NEXT:  ## BB#61: ## %cond.store59
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm1
@@ -9070,9 +8768,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_62: ## %else60
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_64
 ; AVX512F-NEXT:  ## BB#63: ## %cond.store61
 ; AVX512F-NEXT:    vextracti128 $1, %ymm4, %xmm0
@@ -9080,117 +8776,91 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_64: ## %else62
 ; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_66
 ; AVX512F-NEXT:  ## BB#65: ## %cond.store63
 ; AVX512F-NEXT:    vpextrb $0, %xmm5, 32(%rdi)
 ; AVX512F-NEXT:  LBB58_66: ## %else64
 ; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_68
 ; AVX512F-NEXT:  ## BB#67: ## %cond.store65
 ; AVX512F-NEXT:    vpextrb $1, %xmm5, 33(%rdi)
 ; AVX512F-NEXT:  LBB58_68: ## %else66
 ; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_70
 ; AVX512F-NEXT:  ## BB#69: ## %cond.store67
 ; AVX512F-NEXT:    vpextrb $2, %xmm5, 34(%rdi)
 ; AVX512F-NEXT:  LBB58_70: ## %else68
 ; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_72
 ; AVX512F-NEXT:  ## BB#71: ## %cond.store69
 ; AVX512F-NEXT:    vpextrb $3, %xmm5, 35(%rdi)
 ; AVX512F-NEXT:  LBB58_72: ## %else70
 ; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_74
 ; AVX512F-NEXT:  ## BB#73: ## %cond.store71
 ; AVX512F-NEXT:    vpextrb $4, %xmm5, 36(%rdi)
 ; AVX512F-NEXT:  LBB58_74: ## %else72
 ; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_76
 ; AVX512F-NEXT:  ## BB#75: ## %cond.store73
 ; AVX512F-NEXT:    vpextrb $5, %xmm5, 37(%rdi)
 ; AVX512F-NEXT:  LBB58_76: ## %else74
 ; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_78
 ; AVX512F-NEXT:  ## BB#77: ## %cond.store75
 ; AVX512F-NEXT:    vpextrb $6, %xmm5, 38(%rdi)
 ; AVX512F-NEXT:  LBB58_78: ## %else76
 ; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_80
 ; AVX512F-NEXT:  ## BB#79: ## %cond.store77
 ; AVX512F-NEXT:    vpextrb $7, %xmm5, 39(%rdi)
 ; AVX512F-NEXT:  LBB58_80: ## %else78
 ; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_82
 ; AVX512F-NEXT:  ## BB#81: ## %cond.store79
 ; AVX512F-NEXT:    vpextrb $8, %xmm5, 40(%rdi)
 ; AVX512F-NEXT:  LBB58_82: ## %else80
 ; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_84
 ; AVX512F-NEXT:  ## BB#83: ## %cond.store81
 ; AVX512F-NEXT:    vpextrb $9, %xmm5, 41(%rdi)
 ; AVX512F-NEXT:  LBB58_84: ## %else82
 ; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_86
 ; AVX512F-NEXT:  ## BB#85: ## %cond.store83
 ; AVX512F-NEXT:    vpextrb $10, %xmm5, 42(%rdi)
 ; AVX512F-NEXT:  LBB58_86: ## %else84
 ; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_88
 ; AVX512F-NEXT:  ## BB#87: ## %cond.store85
 ; AVX512F-NEXT:    vpextrb $11, %xmm5, 43(%rdi)
 ; AVX512F-NEXT:  LBB58_88: ## %else86
 ; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_90
 ; AVX512F-NEXT:  ## BB#89: ## %cond.store87
 ; AVX512F-NEXT:    vpextrb $12, %xmm5, 44(%rdi)
@@ -9198,9 +8868,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:    vpmovsxbd %xmm3, %zmm0
 ; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_92
 ; AVX512F-NEXT:  ## BB#91: ## %cond.store89
 ; AVX512F-NEXT:    vpextrb $13, %xmm5, 45(%rdi)
@@ -9208,27 +8876,21 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:    vpslld $31, %zmm0, %zmm0
 ; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB58_94
 ; AVX512F-NEXT:  ## BB#93: ## %cond.store91
 ; AVX512F-NEXT:    vpextrb $14, %xmm5, 46(%rdi)
 ; AVX512F-NEXT:  LBB58_94: ## %else92
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_96
 ; AVX512F-NEXT:  ## BB#95: ## %cond.store93
 ; AVX512F-NEXT:    vpextrb $15, %xmm5, 47(%rdi)
 ; AVX512F-NEXT:  LBB58_96: ## %else94
 ; AVX512F-NEXT:    kshiftlw $15, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_98
 ; AVX512F-NEXT:  ## BB#97: ## %cond.store95
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9236,9 +8898,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_98: ## %else96
 ; AVX512F-NEXT:    kshiftlw $14, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_100
 ; AVX512F-NEXT:  ## BB#99: ## %cond.store97
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9246,9 +8906,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_100: ## %else98
 ; AVX512F-NEXT:    kshiftlw $13, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_102
 ; AVX512F-NEXT:  ## BB#101: ## %cond.store99
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9256,9 +8914,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_102: ## %else100
 ; AVX512F-NEXT:    kshiftlw $12, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_104
 ; AVX512F-NEXT:  ## BB#103: ## %cond.store101
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9266,9 +8922,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_104: ## %else102
 ; AVX512F-NEXT:    kshiftlw $11, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_106
 ; AVX512F-NEXT:  ## BB#105: ## %cond.store103
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9276,9 +8930,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_106: ## %else104
 ; AVX512F-NEXT:    kshiftlw $10, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_108
 ; AVX512F-NEXT:  ## BB#107: ## %cond.store105
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9286,9 +8938,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_108: ## %else106
 ; AVX512F-NEXT:    kshiftlw $9, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_110
 ; AVX512F-NEXT:  ## BB#109: ## %cond.store107
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9296,9 +8946,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_110: ## %else108
 ; AVX512F-NEXT:    kshiftlw $8, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_112
 ; AVX512F-NEXT:  ## BB#111: ## %cond.store109
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9306,9 +8954,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_112: ## %else110
 ; AVX512F-NEXT:    kshiftlw $7, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_114
 ; AVX512F-NEXT:  ## BB#113: ## %cond.store111
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9316,9 +8962,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_114: ## %else112
 ; AVX512F-NEXT:    kshiftlw $6, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_116
 ; AVX512F-NEXT:  ## BB#115: ## %cond.store113
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9326,9 +8970,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_116: ## %else114
 ; AVX512F-NEXT:    kshiftlw $5, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_118
 ; AVX512F-NEXT:  ## BB#117: ## %cond.store115
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9336,9 +8978,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_118: ## %else116
 ; AVX512F-NEXT:    kshiftlw $4, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_120
 ; AVX512F-NEXT:  ## BB#119: ## %cond.store117
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9346,9 +8986,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_120: ## %else118
 ; AVX512F-NEXT:    kshiftlw $3, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_122
 ; AVX512F-NEXT:  ## BB#121: ## %cond.store119
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9356,9 +8994,7 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_122: ## %else120
 ; AVX512F-NEXT:    kshiftlw $2, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_124
 ; AVX512F-NEXT:  ## BB#123: ## %cond.store121
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9366,18 +9002,14 @@ define void @test_mask_store_64xi8(<64 x
 ; AVX512F-NEXT:  LBB58_124: ## %else122
 ; AVX512F-NEXT:    kshiftlw $1, %k1, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_126
 ; AVX512F-NEXT:  ## BB#125: ## %cond.store123
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
 ; AVX512F-NEXT:    vpextrb $14, %xmm0, 62(%rdi)
 ; AVX512F-NEXT:  LBB58_126: ## %else124
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB58_128
 ; AVX512F-NEXT:  ## BB#127: ## %cond.store125
 ; AVX512F-NEXT:    vextracti128 $1, %ymm5, %xmm0
@@ -9456,72 +9088,56 @@ define void @test_mask_store_8xi16(<8 x
 ; AVX512F-NEXT:    vptestmq %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB59_2
 ; AVX512F-NEXT:  ## BB#1: ## %cond.store
 ; AVX512F-NEXT:    vpextrw $0, %xmm1, (%rdi)
 ; AVX512F-NEXT:  LBB59_2: ## %else
 ; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB59_4
 ; AVX512F-NEXT:  ## BB#3: ## %cond.store1
 ; AVX512F-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
 ; AVX512F-NEXT:  LBB59_4: ## %else2
 ; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB59_6
 ; AVX512F-NEXT:  ## BB#5: ## %cond.store3
 ; AVX512F-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
 ; AVX512F-NEXT:  LBB59_6: ## %else4
 ; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB59_8
 ; AVX512F-NEXT:  ## BB#7: ## %cond.store5
 ; AVX512F-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
 ; AVX512F-NEXT:  LBB59_8: ## %else6
 ; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB59_10
 ; AVX512F-NEXT:  ## BB#9: ## %cond.store7
 ; AVX512F-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
 ; AVX512F-NEXT:  LBB59_10: ## %else8
 ; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB59_12
 ; AVX512F-NEXT:  ## BB#11: ## %cond.store9
 ; AVX512F-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
 ; AVX512F-NEXT:  LBB59_12: ## %else10
 ; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB59_14
 ; AVX512F-NEXT:  ## BB#13: ## %cond.store11
 ; AVX512F-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
 ; AVX512F-NEXT:  LBB59_14: ## %else12
 ; AVX512F-NEXT:    kshiftlw $8, %k0, %k0
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB59_16
 ; AVX512F-NEXT:  ## BB#15: ## %cond.store13
 ; AVX512F-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
@@ -9765,81 +9381,63 @@ define void @test_mask_store_16xi16(<16
 ; AVX512F-NEXT:    vptestmd %zmm0, %zmm0, %k0
 ; AVX512F-NEXT:    kshiftlw $15, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_2
 ; AVX512F-NEXT:  ## BB#1: ## %cond.store
 ; AVX512F-NEXT:    vpextrw $0, %xmm1, (%rdi)
 ; AVX512F-NEXT:  LBB60_2: ## %else
 ; AVX512F-NEXT:    kshiftlw $14, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_4
 ; AVX512F-NEXT:  ## BB#3: ## %cond.store1
 ; AVX512F-NEXT:    vpextrw $1, %xmm1, 2(%rdi)
 ; AVX512F-NEXT:  LBB60_4: ## %else2
 ; AVX512F-NEXT:    kshiftlw $13, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_6
 ; AVX512F-NEXT:  ## BB#5: ## %cond.store3
 ; AVX512F-NEXT:    vpextrw $2, %xmm1, 4(%rdi)
 ; AVX512F-NEXT:  LBB60_6: ## %else4
 ; AVX512F-NEXT:    kshiftlw $12, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_8
 ; AVX512F-NEXT:  ## BB#7: ## %cond.store5
 ; AVX512F-NEXT:    vpextrw $3, %xmm1, 6(%rdi)
 ; AVX512F-NEXT:  LBB60_8: ## %else6
 ; AVX512F-NEXT:    kshiftlw $11, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_10
 ; AVX512F-NEXT:  ## BB#9: ## %cond.store7
 ; AVX512F-NEXT:    vpextrw $4, %xmm1, 8(%rdi)
 ; AVX512F-NEXT:  LBB60_10: ## %else8
 ; AVX512F-NEXT:    kshiftlw $10, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_12
 ; AVX512F-NEXT:  ## BB#11: ## %cond.store9
 ; AVX512F-NEXT:    vpextrw $5, %xmm1, 10(%rdi)
 ; AVX512F-NEXT:  LBB60_12: ## %else10
 ; AVX512F-NEXT:    kshiftlw $9, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_14
 ; AVX512F-NEXT:  ## BB#13: ## %cond.store11
 ; AVX512F-NEXT:    vpextrw $6, %xmm1, 12(%rdi)
 ; AVX512F-NEXT:  LBB60_14: ## %else12
 ; AVX512F-NEXT:    kshiftlw $8, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_16
 ; AVX512F-NEXT:  ## BB#15: ## %cond.store13
 ; AVX512F-NEXT:    vpextrw $7, %xmm1, 14(%rdi)
 ; AVX512F-NEXT:  LBB60_16: ## %else14
 ; AVX512F-NEXT:    kshiftlw $7, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_18
 ; AVX512F-NEXT:  ## BB#17: ## %cond.store15
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
@@ -9847,9 +9445,7 @@ define void @test_mask_store_16xi16(<16
 ; AVX512F-NEXT:  LBB60_18: ## %else16
 ; AVX512F-NEXT:    kshiftlw $6, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_20
 ; AVX512F-NEXT:  ## BB#19: ## %cond.store17
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
@@ -9857,9 +9453,7 @@ define void @test_mask_store_16xi16(<16
 ; AVX512F-NEXT:  LBB60_20: ## %else18
 ; AVX512F-NEXT:    kshiftlw $5, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_22
 ; AVX512F-NEXT:  ## BB#21: ## %cond.store19
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
@@ -9867,9 +9461,7 @@ define void @test_mask_store_16xi16(<16
 ; AVX512F-NEXT:  LBB60_22: ## %else20
 ; AVX512F-NEXT:    kshiftlw $4, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_24
 ; AVX512F-NEXT:  ## BB#23: ## %cond.store21
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
@@ -9877,9 +9469,7 @@ define void @test_mask_store_16xi16(<16
 ; AVX512F-NEXT:  LBB60_24: ## %else22
 ; AVX512F-NEXT:    kshiftlw $3, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_26
 ; AVX512F-NEXT:  ## BB#25: ## %cond.store23
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
@@ -9887,9 +9477,7 @@ define void @test_mask_store_16xi16(<16
 ; AVX512F-NEXT:  LBB60_26: ## %else24
 ; AVX512F-NEXT:    kshiftlw $2, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_28
 ; AVX512F-NEXT:  ## BB#27: ## %cond.store25
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
@@ -9897,18 +9485,14 @@ define void @test_mask_store_16xi16(<16
 ; AVX512F-NEXT:  LBB60_28: ## %else26
 ; AVX512F-NEXT:    kshiftlw $1, %k0, %k1
 ; AVX512F-NEXT:    kshiftrw $15, %k1, %k1
-; AVX512F-NEXT:    kmovw %k1, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k1, %k1
 ; AVX512F-NEXT:    je LBB60_30
 ; AVX512F-NEXT:  ## BB#29: ## %cond.store27
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0
 ; AVX512F-NEXT:    vpextrw $6, %xmm0, 28(%rdi)
 ; AVX512F-NEXT:  LBB60_30: ## %else28
 ; AVX512F-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512F-NEXT:    kmovw %k0, %eax
-; AVX512F-NEXT:    andl $1, %eax
-; AVX512F-NEXT:    testb %al, %al
+; AVX512F-NEXT:    kortestw %k0, %k0
 ; AVX512F-NEXT:    je LBB60_32
 ; AVX512F-NEXT:  ## BB#31: ## %cond.store29
 ; AVX512F-NEXT:    vextracti128 $1, %ymm1, %xmm0




More information about the llvm-commits mailing list