[llvm] r323201 - [X86] Legalize v32i1 without BWI via splitting to v16i1 rather than the default of promoting to v32i8.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 23 06:25:39 PST 2018
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=323201&r1=323200&r2=323201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Tue Jan 23 06:25:39 2018
@@ -11,22 +11,10 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -47,22 +35,10 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -85,25 +61,11 @@ define zeroext i32 @test_masked_vpcmpeqb
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -127,25 +89,11 @@ define zeroext i32 @test_masked_vpcmpeqb
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -170,27 +118,11 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -211,27 +143,11 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -254,30 +170,12 @@ define zeroext i64 @test_masked_vpcmpeqb
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -301,30 +199,12 @@ define zeroext i64 @test_masked_vpcmpeqb
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -350,30 +230,16 @@ define zeroext i64 @test_vpcmpeqb_v32i1_
;
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -395,30 +261,16 @@ define zeroext i64 @test_vpcmpeqb_v32i1_
;
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -442,41 +294,19 @@ define zeroext i64 @test_masked_vpcmpeqb
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -501,41 +331,19 @@ define zeroext i64 @test_masked_vpcmpeqb
;
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -675,49 +483,10 @@ define zeroext i32 @test_vpcmpeqw_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -738,49 +507,10 @@ define zeroext i32 @test_vpcmpeqw_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -803,50 +533,11 @@ define zeroext i32 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -870,50 +561,11 @@ define zeroext i32 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -938,54 +590,11 @@ define zeroext i64 @test_vpcmpeqw_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1006,54 +615,11 @@ define zeroext i64 @test_vpcmpeqw_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1076,55 +642,12 @@ define zeroext i64 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1148,55 +671,12 @@ define zeroext i64 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1222,22 +702,10 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1259,22 +727,10 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1298,25 +754,11 @@ define zeroext i32 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1341,25 +783,11 @@ define zeroext i32 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1385,27 +813,11 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1427,27 +839,11 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1471,30 +867,12 @@ define zeroext i64 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1519,30 +897,12 @@ define zeroext i64 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1568,30 +928,43 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -1599,109 +972,109 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
@@ -1709,7 +1082,11 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -1717,51 +1094,20 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1783,67 +1129,60 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
;
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -1851,7 +1190,7 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -1861,37 +1200,30 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -1915,51 +1247,23 @@ define zeroext i64 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -1967,109 +1271,109 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
@@ -2077,17 +1381,7 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
-; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -2095,35 +1389,47 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2148,68 +1454,60 @@ define zeroext i64 @test_masked_vpcmpeqw
;
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -2217,7 +1515,7 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -2227,51 +1525,33 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2678,37 +1958,12 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2729,37 +1984,12 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2782,38 +2012,13 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2838,38 +2043,13 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2895,37 +2075,12 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -2949,38 +2104,13 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3007,42 +2137,13 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3063,42 +2164,13 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3121,43 +2193,14 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3182,43 +2225,14 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3244,42 +2258,13 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3303,43 +2288,14 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3561,49 +2517,12 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3625,49 +2544,12 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3691,50 +2573,13 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3759,50 +2604,13 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3828,49 +2636,12 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3895,50 +2666,13 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -3965,54 +2699,13 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4034,54 +2727,13 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4105,57 +2757,16 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
-; NoVLX-NEXT: vzeroupper
-; NoVLX-NEXT: retq
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
+; NoVLX-NEXT: vzeroupper
+; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
%1 = bitcast <4 x i64> %__b to <8 x i32>
@@ -4178,55 +2789,14 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4252,54 +2822,13 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4324,55 +2853,14 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4399,22 +2887,8 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4436,22 +2910,8 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4475,23 +2935,9 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4516,23 +2962,9 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4558,22 +2990,8 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4598,23 +3016,9 @@ define zeroext i32 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4641,27 +3045,9 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4683,27 +3069,9 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4727,28 +3095,10 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4773,28 +3123,10 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4820,27 +3152,9 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
;
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -4865,28 +3179,10 @@ define zeroext i64 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5485,31 +3781,12 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5530,31 +3807,12 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5577,32 +3835,13 @@ define zeroext i32 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5627,32 +3866,13 @@ define zeroext i32 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5678,31 +3898,12 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5726,32 +3927,13 @@ define zeroext i32 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5778,36 +3960,13 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5828,36 +3987,13 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5880,37 +4016,14 @@ define zeroext i64 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5935,37 +4048,14 @@ define zeroext i64 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -5991,36 +4081,13 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6044,37 +4111,14 @@ define zeroext i64 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6496,37 +4540,12 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6548,37 +4567,12 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6602,38 +4596,13 @@ define zeroext i32 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6659,38 +4628,13 @@ define zeroext i32 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6717,37 +4661,12 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6772,38 +4691,13 @@ define zeroext i32 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6831,42 +4725,13 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6888,42 +4753,13 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -6947,43 +4783,14 @@ define zeroext i64 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7009,43 +4816,14 @@ define zeroext i64 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7072,42 +4850,13 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7132,43 +4881,14 @@ define zeroext i64 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7366,47 +5086,8 @@ define zeroext i32 @test_vpcmpeqq_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7428,47 +5109,8 @@ define zeroext i32 @test_vpcmpeqq_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7492,48 +5134,9 @@ define zeroext i32 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7558,48 +5161,9 @@ define zeroext i32 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7625,47 +5189,8 @@ define zeroext i32 @test_vpcmpeqq_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7690,48 +5215,9 @@ define zeroext i32 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7758,52 +5244,9 @@ define zeroext i64 @test_vpcmpeqq_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7825,52 +5268,9 @@ define zeroext i64 @test_vpcmpeqq_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7894,53 +5294,10 @@ define zeroext i64 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -7965,53 +5322,10 @@ define zeroext i64 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8037,52 +5351,9 @@ define zeroext i64 @test_vpcmpeqq_v8i1_v
;
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8107,53 +5378,10 @@ define zeroext i64 @test_masked_vpcmpeqq
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8179,22 +5407,10 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8215,22 +5431,10 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8253,25 +5457,11 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8295,25 +5485,11 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8338,27 +5514,11 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8379,27 +5539,11 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8422,30 +5566,12 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8469,30 +5595,12 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8518,30 +5626,16 @@ define zeroext i64 @test_vpcmpsgtb_v32i1
;
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8563,30 +5657,16 @@ define zeroext i64 @test_vpcmpsgtb_v32i1
;
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8610,41 +5690,19 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8669,41 +5727,19 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8843,49 +5879,10 @@ define zeroext i32 @test_vpcmpsgtw_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8906,49 +5903,10 @@ define zeroext i32 @test_vpcmpsgtw_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -8971,50 +5929,11 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9038,50 +5957,11 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9106,54 +5986,11 @@ define zeroext i64 @test_vpcmpsgtw_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9174,54 +6011,11 @@ define zeroext i64 @test_vpcmpsgtw_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9244,55 +6038,12 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9316,55 +6067,12 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9390,22 +6098,10 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9427,22 +6123,10 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9466,25 +6150,11 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9509,25 +6179,11 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9553,27 +6209,11 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9595,27 +6235,11 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9639,30 +6263,12 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9687,30 +6293,12 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9736,30 +6324,63 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -9769,17 +6390,17 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
@@ -9787,69 +6408,69 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
@@ -9858,26 +6479,10 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -9885,51 +6490,20 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -9951,67 +6525,60 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
;
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -10019,7 +6586,7 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -10029,37 +6596,30 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -10083,51 +6643,23 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -10135,109 +6667,109 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
@@ -10245,17 +6777,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -10263,35 +6785,47 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -10316,68 +6850,60 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -10385,7 +6911,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -10395,51 +6921,33 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -10846,37 +7354,12 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -10897,37 +7380,12 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -10950,38 +7408,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11006,38 +7439,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11063,37 +7471,12 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11117,38 +7500,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11175,42 +7533,13 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11231,42 +7560,13 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11289,43 +7589,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11350,43 +7621,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11412,42 +7654,13 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11471,43 +7684,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11729,49 +7913,12 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11793,49 +7940,12 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11859,50 +7969,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11927,50 +8000,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -11996,49 +8032,12 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12063,50 +8062,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12133,54 +8095,13 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12202,54 +8123,13 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12273,55 +8153,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12346,55 +8185,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12420,54 +8218,13 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12492,55 +8249,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12567,22 +8283,8 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12604,22 +8306,8 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12643,23 +8331,9 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12684,23 +8358,9 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12726,22 +8386,8 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12766,23 +8412,9 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12809,27 +8441,9 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12851,27 +8465,9 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12895,28 +8491,10 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12941,28 +8519,10 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -12988,27 +8548,9 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
;
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13033,28 +8575,10 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13653,31 +9177,12 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13698,31 +9203,12 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13745,32 +9231,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13795,32 +9262,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13846,31 +9294,12 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13894,32 +9323,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13946,36 +9356,13 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -13996,36 +9383,13 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14048,37 +9412,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14103,37 +9444,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14159,36 +9477,13 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14212,37 +9507,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14664,37 +9936,12 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14716,37 +9963,12 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14770,38 +9992,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14827,38 +10024,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14885,37 +10057,12 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14940,38 +10087,13 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -14999,42 +10121,13 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15056,42 +10149,13 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15115,43 +10179,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15177,43 +10212,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15240,42 +10246,13 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15300,43 +10277,14 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15534,47 +10482,8 @@ define zeroext i32 @test_vpcmpsgtq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15596,47 +10505,8 @@ define zeroext i32 @test_vpcmpsgtq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15660,48 +10530,9 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15726,48 +10557,9 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15793,47 +10585,8 @@ define zeroext i32 @test_vpcmpsgtq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15858,48 +10611,9 @@ define zeroext i32 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15926,52 +10640,9 @@ define zeroext i64 @test_vpcmpsgtq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -15993,52 +10664,9 @@ define zeroext i64 @test_vpcmpsgtq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16062,53 +10690,10 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16133,53 +10718,10 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16205,52 +10747,9 @@ define zeroext i64 @test_vpcmpsgtq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16275,53 +10774,10 @@ define zeroext i64 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16347,25 +10803,13 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16386,26 +10830,14 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16428,28 +10860,14 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16473,13 +10891,6 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -16487,15 +10898,8 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16520,30 +10924,14 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16564,31 +10952,15 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
;
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16611,33 +10983,15 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16661,13 +11015,6 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -16675,20 +11022,9 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16714,34 +11050,20 @@ define zeroext i64 @test_vpcmpsgeb_v32i1
;
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16763,35 +11085,21 @@ define zeroext i64 @test_vpcmpsgeb_v32i1
;
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16815,43 +11123,23 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -16876,44 +11164,24 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
-; NoVLX-NEXT: vmovdqa (%rsi), %ymm3
-; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0
-; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17067,52 +11335,13 @@ define zeroext i32 @test_vpcmpsgew_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17133,13 +11362,6 @@ define zeroext i32 @test_vpcmpsgew_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -17147,39 +11369,7 @@ define zeroext i32 @test_vpcmpsgew_v8i1_
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17202,13 +11392,6 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
@@ -17216,39 +11399,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17272,13 +11423,6 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -17287,39 +11431,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17344,57 +11456,14 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17415,13 +11484,6 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -17429,44 +11491,8 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17489,13 +11515,6 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
@@ -17503,44 +11522,8 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17564,13 +11547,6 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -17579,44 +11555,8 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17642,25 +11582,13 @@ define zeroext i32 @test_vpcmpsgew_v16i1
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17682,26 +11610,14 @@ define zeroext i32 @test_vpcmpsgew_v16i1
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17725,28 +11641,14 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17771,13 +11673,6 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
@@ -17785,15 +11680,8 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17819,30 +11707,14 @@ define zeroext i64 @test_vpcmpsgew_v16i1
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17864,31 +11736,15 @@ define zeroext i64 @test_vpcmpsgew_v16i1
;
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17912,33 +11768,15 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -17963,13 +11801,6 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
@@ -17977,20 +11808,9 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -18016,205 +11836,191 @@ define zeroext i64 @test_vpcmpsgew_v32i1
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -18236,67 +12042,60 @@ define zeroext i64 @test_vpcmpsgew_v32i1
;
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -18304,7 +12103,7 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -18314,44 +12113,37 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -18375,51 +12167,23 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -18427,109 +12191,109 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
@@ -18537,17 +12301,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -18555,38 +12309,52 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
-; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -18611,68 +12379,60 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -18680,7 +12440,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -18690,56 +12450,40 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vmovdqa (%rsi), %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0
-; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
-; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpcmpeqd %ymm2, %ymm2, %ymm2
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpand %xmm2, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19146,37 +12890,12 @@ define zeroext i32 @test_vpcmpsged_v4i1_
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19197,37 +12916,12 @@ define zeroext i32 @test_vpcmpsged_v4i1_
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19250,38 +12944,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19306,38 +12975,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19363,37 +13007,12 @@ define zeroext i32 @test_vpcmpsged_v4i1_
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19417,38 +13036,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19475,42 +13069,13 @@ define zeroext i64 @test_vpcmpsged_v4i1_
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19531,42 +13096,13 @@ define zeroext i64 @test_vpcmpsged_v4i1_
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19589,43 +13125,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19650,43 +13157,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19712,42 +13190,13 @@ define zeroext i64 @test_vpcmpsged_v4i1_
;
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -19771,43 +13220,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20029,49 +13449,12 @@ define zeroext i32 @test_vpcmpsged_v8i1_
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20093,49 +13476,12 @@ define zeroext i32 @test_vpcmpsged_v8i1_
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20159,50 +13505,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20227,50 +13536,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20296,49 +13568,12 @@ define zeroext i32 @test_vpcmpsged_v8i1_
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20363,50 +13598,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20433,54 +13631,13 @@ define zeroext i64 @test_vpcmpsged_v8i1_
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20502,54 +13659,13 @@ define zeroext i64 @test_vpcmpsged_v8i1_
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20573,55 +13689,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20646,55 +13721,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20720,54 +13754,13 @@ define zeroext i64 @test_vpcmpsged_v8i1_
;
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20792,55 +13785,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20867,22 +13819,8 @@ define zeroext i32 @test_vpcmpsged_v16i1
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20904,22 +13842,8 @@ define zeroext i32 @test_vpcmpsged_v16i1
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20943,23 +13867,9 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -20984,23 +13894,9 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21026,22 +13922,8 @@ define zeroext i32 @test_vpcmpsged_v16i1
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21066,23 +13948,9 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21109,27 +13977,9 @@ define zeroext i64 @test_vpcmpsged_v16i1
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21151,27 +14001,9 @@ define zeroext i64 @test_vpcmpsged_v16i1
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21195,28 +14027,10 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21241,28 +14055,10 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21288,27 +14084,9 @@ define zeroext i64 @test_vpcmpsged_v16i1
;
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21333,28 +14111,10 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21953,31 +14713,12 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -21998,31 +14739,12 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22045,32 +14767,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22095,32 +14798,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22146,31 +14830,12 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22194,32 +14859,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22246,36 +14892,13 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22296,36 +14919,13 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22348,37 +14948,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22403,37 +14980,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22459,36 +15013,13 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22512,37 +15043,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -22964,37 +15472,12 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23016,37 +15499,12 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23070,38 +15528,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23127,38 +15560,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23185,37 +15593,12 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23240,38 +15623,13 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23299,42 +15657,13 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23356,42 +15685,13 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23415,43 +15715,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23477,43 +15748,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23540,42 +15782,13 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23600,43 +15813,14 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23834,47 +16018,8 @@ define zeroext i32 @test_vpcmpsgeq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23896,47 +16041,8 @@ define zeroext i32 @test_vpcmpsgeq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -23960,48 +16066,9 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24026,48 +16093,9 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24093,47 +16121,8 @@ define zeroext i32 @test_vpcmpsgeq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24158,48 +16147,9 @@ define zeroext i32 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24226,52 +16176,9 @@ define zeroext i64 @test_vpcmpsgeq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24293,52 +16200,9 @@ define zeroext i64 @test_vpcmpsgeq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24362,53 +16226,10 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24433,53 +16254,10 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24505,52 +16283,9 @@ define zeroext i64 @test_vpcmpsgeq_v8i1_
;
; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24575,53 +16310,10 @@ define zeroext i64 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24647,25 +16339,13 @@ define zeroext i32 @test_vpcmpultb_v16i1
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24686,25 +16366,13 @@ define zeroext i32 @test_vpcmpultb_v16i1
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24727,28 +16395,14 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24772,28 +16426,14 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24818,30 +16458,14 @@ define zeroext i64 @test_vpcmpultb_v16i1
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24862,30 +16486,14 @@ define zeroext i64 @test_vpcmpultb_v16i1
;
; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24908,33 +16516,15 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -24958,33 +16548,15 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25010,33 +16582,19 @@ define zeroext i64 @test_vpcmpultb_v32i1
;
; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25058,33 +16616,19 @@ define zeroext i64 @test_vpcmpultb_v32i1
;
; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25108,44 +16652,22 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25170,44 +16692,22 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm2, %xmm2
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
-; NoVLX-NEXT: vpxor %ymm3, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor (%rsi), %ymm3, %ymm3
-; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm3, %ymm0
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vpand %xmm2, %xmm3, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25359,52 +16859,13 @@ define zeroext i32 @test_vpcmpultw_v8i1_
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25425,52 +16886,13 @@ define zeroext i32 @test_vpcmpultw_v8i1_
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25493,13 +16915,6 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
@@ -25507,39 +16922,7 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25563,13 +16946,6 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
@@ -25577,39 +16953,7 @@ define zeroext i32 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25634,57 +16978,14 @@ define zeroext i64 @test_vpcmpultw_v8i1_
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25705,57 +17006,14 @@ define zeroext i64 @test_vpcmpultw_v8i1_
;
; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1
; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25778,13 +17036,6 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1
@@ -25792,44 +17043,8 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25853,13 +17068,6 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1
@@ -25867,44 +17075,8 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25930,25 +17102,13 @@ define zeroext i32 @test_vpcmpultw_v16i1
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -25970,25 +17130,13 @@ define zeroext i32 @test_vpcmpultw_v16i1
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -26012,28 +17160,14 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -26058,28 +17192,14 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -26105,30 +17225,14 @@ define zeroext i64 @test_vpcmpultw_v16i1
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -26150,30 +17254,14 @@ define zeroext i64 @test_vpcmpultw_v16i1
;
; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -26197,33 +17285,15 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -26248,33 +17318,15 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -26300,30 +17352,43 @@ define zeroext i64 @test_vpcmpultw_v32i1
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rax
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -26331,109 +17396,109 @@ define zeroext i64 @test_vpcmpultw_v32i1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
@@ -26441,7 +17506,10 @@ define zeroext i64 @test_vpcmpultw_v32i1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -26449,56 +17517,26 @@ define zeroext i64 @test_vpcmpultw_v32i1
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm8, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm0
+; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -26520,67 +17558,60 @@ define zeroext i64 @test_vpcmpultw_v32i1
;
; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm1
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -26588,7 +17619,7 @@ define zeroext i64 @test_vpcmpultw_v32i1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -26598,42 +17629,35 @@ define zeroext i64 @test_vpcmpultw_v32i1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor 32(%rdi), %ymm2, %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor (%rdi), %ymm2, %ymm1
+; NoVLX-NEXT: vpxor (%rdi), %ymm2, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm0
+; NoVLX-NEXT: vpxor 32(%rdi), %ymm2, %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -26657,51 +17681,23 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -26709,109 +17705,109 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
@@ -26819,16 +17815,7 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm0
-; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
-; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm6, %xmm6
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -26836,41 +17823,52 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
+; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
-; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm5, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm2
+; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
+; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
+; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
-; NoVLX-NEXT: vpxor %ymm5, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpand %xmm6, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -26895,28 +17893,20 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $96, %rsp
-; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vmovq %xmm1, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
@@ -26924,39 +17914,39 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -26964,7 +17954,7 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -26974,56 +17964,38 @@ define zeroext i64 @test_masked_vpcmpult
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; NoVLX-NEXT: vpternlogd $255, %zmm4, %zmm4, %zmm4 {%k1} {z}
-; NoVLX-NEXT: vpmovdb %zmm4, %xmm4
-; NoVLX-NEXT: vpternlogd $255, %zmm5, %zmm5, %zmm5 {%k2} {z}
-; NoVLX-NEXT: vpmovdb %zmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: shrl $16, %edi
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm1
; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768]
-; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
-; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm3, %ymm0
-; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm4, %xmm0, %xmm0
; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm1
+; NoVLX-NEXT: vpxor 32(%rsi), %ymm2, %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
-; NoVLX-NEXT: vpand %xmm5, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax
-; NoVLX-NEXT: shlq $32, %rax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0
+; NoVLX-NEXT: vpxor (%rsi), %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
+; NoVLX-NEXT: kmovw %edi, %k2
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %ecx
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: shll $16, %eax
+; NoVLX-NEXT: orl %ecx, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27430,37 +18402,12 @@ define zeroext i32 @test_vpcmpultd_v4i1_
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27481,37 +18428,12 @@ define zeroext i32 @test_vpcmpultd_v4i1_
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27534,38 +18456,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27590,38 +18487,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27647,37 +18519,12 @@ define zeroext i32 @test_vpcmpultd_v4i1_
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27701,38 +18548,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27759,42 +18581,13 @@ define zeroext i64 @test_vpcmpultd_v4i1_
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27815,42 +18608,13 @@ define zeroext i64 @test_vpcmpultd_v4i1_
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27873,43 +18637,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27934,43 +18669,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -27996,42 +18702,13 @@ define zeroext i64 @test_vpcmpultd_v4i1_
;
; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28055,43 +18732,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28313,49 +18961,12 @@ define zeroext i32 @test_vpcmpultd_v8i1_
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28377,49 +18988,12 @@ define zeroext i32 @test_vpcmpultd_v8i1_
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28443,50 +19017,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28511,50 +19048,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28580,49 +19080,12 @@ define zeroext i32 @test_vpcmpultd_v8i1_
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28647,50 +19110,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28717,54 +19143,13 @@ define zeroext i64 @test_vpcmpultd_v8i1_
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28786,54 +19171,13 @@ define zeroext i64 @test_vpcmpultd_v8i1_
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28857,55 +19201,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -28930,55 +19233,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29004,54 +19266,13 @@ define zeroext i64 @test_vpcmpultd_v8i1_
;
; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29076,55 +19297,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29151,22 +19331,8 @@ define zeroext i32 @test_vpcmpultd_v16i1
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29188,22 +19354,8 @@ define zeroext i32 @test_vpcmpultd_v16i1
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29227,23 +19379,9 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29268,23 +19406,9 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29310,22 +19434,8 @@ define zeroext i32 @test_vpcmpultd_v16i1
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29350,23 +19460,9 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29393,27 +19489,9 @@ define zeroext i64 @test_vpcmpultd_v16i1
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29435,27 +19513,9 @@ define zeroext i64 @test_vpcmpultd_v16i1
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29479,28 +19539,10 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29525,28 +19567,10 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29572,27 +19596,9 @@ define zeroext i64 @test_vpcmpultd_v16i1
;
; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -29617,28 +19623,10 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30237,31 +20225,12 @@ define zeroext i32 @test_vpcmpultq_v2i1_
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30282,31 +20251,12 @@ define zeroext i32 @test_vpcmpultq_v2i1_
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30329,32 +20279,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30379,32 +20310,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30430,31 +20342,12 @@ define zeroext i32 @test_vpcmpultq_v2i1_
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30478,32 +20371,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30530,36 +20404,13 @@ define zeroext i64 @test_vpcmpultq_v2i1_
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30580,36 +20431,13 @@ define zeroext i64 @test_vpcmpultq_v2i1_
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30632,37 +20460,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30687,37 +20492,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30743,36 +20525,13 @@ define zeroext i64 @test_vpcmpultq_v2i1_
;
; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -30796,37 +20555,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31248,37 +20984,12 @@ define zeroext i32 @test_vpcmpultq_v4i1_
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31300,37 +21011,12 @@ define zeroext i32 @test_vpcmpultq_v4i1_
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31354,38 +21040,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31411,38 +21072,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31469,37 +21105,12 @@ define zeroext i32 @test_vpcmpultq_v4i1_
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31524,38 +21135,13 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31583,42 +21169,13 @@ define zeroext i64 @test_vpcmpultq_v4i1_
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31640,42 +21197,13 @@ define zeroext i64 @test_vpcmpultq_v4i1_
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31699,43 +21227,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31761,43 +21260,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31824,42 +21294,13 @@ define zeroext i64 @test_vpcmpultq_v4i1_
;
; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -31884,43 +21325,14 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32118,47 +21530,8 @@ define zeroext i32 @test_vpcmpultq_v8i1_
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32180,47 +21553,8 @@ define zeroext i32 @test_vpcmpultq_v8i1_
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32244,48 +21578,9 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32310,48 +21605,9 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32377,47 +21633,8 @@ define zeroext i32 @test_vpcmpultq_v8i1_
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32442,48 +21659,9 @@ define zeroext i32 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32510,52 +21688,9 @@ define zeroext i64 @test_vpcmpultq_v8i1_
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32577,52 +21712,9 @@ define zeroext i64 @test_vpcmpultq_v8i1_
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32646,53 +21738,10 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32717,53 +21766,10 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32789,52 +21795,9 @@ define zeroext i64 @test_vpcmpultq_v8i1_
;
; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -32859,53 +21822,10 @@ define zeroext i64 @test_masked_vpcmpult
;
; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33308,37 +22228,12 @@ define zeroext i32 @test_vcmpoeqps_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33359,37 +22254,12 @@ define zeroext i32 @test_vcmpoeqps_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33411,37 +22281,12 @@ define zeroext i32 @test_vcmpoeqps_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33465,38 +22310,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33520,38 +22340,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vmovaps (%rsi), %xmm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33576,38 +22371,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33634,42 +22404,13 @@ define zeroext i64 @test_vcmpoeqps_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33690,42 +22431,13 @@ define zeroext i64 @test_vcmpoeqps_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33747,42 +22459,13 @@ define zeroext i64 @test_vcmpoeqps_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33806,43 +22489,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33866,43 +22520,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vmovaps (%rsi), %xmm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -33927,43 +22552,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34185,49 +22781,12 @@ define zeroext i32 @test_vcmpoeqps_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34249,49 +22808,12 @@ define zeroext i32 @test_vcmpoeqps_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34314,49 +22836,12 @@ define zeroext i32 @test_vcmpoeqps_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34381,50 +22866,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34449,50 +22897,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34518,50 +22929,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34589,54 +22963,13 @@ define zeroext i64 @test_vcmpoeqps_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34658,54 +22991,13 @@ define zeroext i64 @test_vcmpoeqps_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34728,54 +23020,13 @@ define zeroext i64 @test_vcmpoeqps_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34800,55 +23051,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34873,55 +23083,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovaps (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -34947,55 +23116,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorps %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $8, %k0, %k0
+; NoVLX-NEXT: kshiftrw $8, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35023,22 +23151,8 @@ define zeroext i32 @test_vcmpoeqps_v16i1
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35060,22 +23174,8 @@ define zeroext i32 @test_vcmpoeqps_v16i1
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35098,22 +23198,8 @@ define zeroext i32 @test_vcmpoeqps_v16i1
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35138,23 +23224,9 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35179,23 +23251,9 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35221,23 +23279,9 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35306,27 +23350,9 @@ define zeroext i64 @test_vcmpoeqps_v16i1
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35348,27 +23374,9 @@ define zeroext i64 @test_vcmpoeqps_v16i1
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35391,27 +23399,9 @@ define zeroext i64 @test_vcmpoeqps_v16i1
;
; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35436,28 +23426,10 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35482,28 +23454,10 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vcmpeqps (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -35529,28 +23483,10 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqps_v16i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: kmovw %edi, %k1
-; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k1 {%k1}
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %edi, %k1
+; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36194,31 +24130,12 @@ define zeroext i32 @test_vcmpoeqpd_v2i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36239,31 +24156,12 @@ define zeroext i32 @test_vcmpoeqpd_v2i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovapd (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36285,31 +24183,12 @@ define zeroext i32 @test_vcmpoeqpd_v2i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36333,32 +24212,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36382,32 +24242,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vmovapd (%rsi), %xmm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36432,32 +24273,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36484,36 +24306,13 @@ define zeroext i64 @test_vcmpoeqpd_v2i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36534,36 +24333,13 @@ define zeroext i64 @test_vcmpoeqpd_v2i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovapd (%rdi), %xmm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36585,36 +24361,13 @@ define zeroext i64 @test_vcmpoeqpd_v2i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36638,37 +24391,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm1 killed %xmm1 def %zmm1
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36692,37 +24422,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vmovapd (%rsi), %xmm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -36747,37 +24454,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %xmm0 killed %xmm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0]
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $14, %k0, %k0
+; NoVLX-NEXT: kshiftrw $14, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37193,37 +24877,12 @@ define zeroext i32 @test_vcmpoeqpd_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37245,37 +24904,12 @@ define zeroext i32 @test_vcmpoeqpd_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovapd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37298,37 +24932,12 @@ define zeroext i32 @test_vcmpoeqpd_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37353,38 +24962,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37409,38 +24993,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vmovapd (%rsi), %ymm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37466,38 +25025,13 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37525,42 +25059,13 @@ define zeroext i64 @test_vcmpoeqpd_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37582,42 +25087,13 @@ define zeroext i64 @test_vcmpoeqpd_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vmovapd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37640,42 +25116,13 @@ define zeroext i64 @test_vcmpoeqpd_v4i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37700,43 +25147,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37761,43 +25179,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vmovapd (%rsi), %ymm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -37823,43 +25212,14 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edx
-; NoVLX-NEXT: kmovw %k0, %esi
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kshiftlw $12, %k0, %k0
+; NoVLX-NEXT: kshiftrw $12, %k0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38113,47 +25473,8 @@ define zeroext i32 @test_vcmpoeqpd_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38175,47 +25496,8 @@ define zeroext i32 @test_vcmpoeqpd_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38238,47 +25520,8 @@ define zeroext i32 @test_vcmpoeqpd_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38303,48 +25546,9 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38369,48 +25573,9 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38436,48 +25601,9 @@ define zeroext i32 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v32i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38555,52 +25681,9 @@ define zeroext i64 @test_vcmpoeqpd_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38622,52 +25705,9 @@ define zeroext i64 @test_vcmpoeqpd_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38690,52 +25730,9 @@ define zeroext i64 @test_vcmpoeqpd_v8i1_
;
; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38760,53 +25757,10 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38831,53 +25785,10 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi), %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -38903,53 +25814,10 @@ define zeroext i64 @test_masked_vcmpoeqp
;
; NoVLX-LABEL: test_masked_vcmpoeqpd_v8i1_v64i1_mask_mem_b:
; NoVLX: # %bb.0: # %entry
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1}
-; NoVLX-NEXT: kshiftrw $7, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r8d
-; NoVLX-NEXT: kshiftrw $6, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r9d
-; NoVLX-NEXT: kshiftrw $5, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %r10d
-; NoVLX-NEXT: kshiftrw $4, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %esi
-; NoVLX-NEXT: kshiftrw $3, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %edi
-; NoVLX-NEXT: kshiftrw $2, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: kshiftrw $1, %k0, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw %k0, %edx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vxorpd %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $3, %edi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $4, %esi, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $5, %r10d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $6, %r9d, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx
-; NoVLX-NEXT: shlq $32, %rcx
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: orq %rcx, %rax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: movzwl %ax, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
@@ -39028,22 +25896,8 @@ define i32 @test_cmpm_rnd_zero(<16 x flo
;
; NoVLX-LABEL: test_cmpm_rnd_zero:
; NoVLX: # %bb.0:
-; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .cfi_offset %rbp, -16
-; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .cfi_def_cfa_register %rbp
-; NoVLX-NEXT: andq $-32, %rsp
-; NoVLX-NEXT: subq $32, %rsp
-; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k1
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
-; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
-; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
%res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
Modified: llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll?rev=323201&r1=323200&r2=323201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-256.ll Tue Jan 23 06:25:39 2018
@@ -411,28 +411,22 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8
;
; AVX512F-LABEL: v32i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $32, %rsp
; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm1
-; AVX512F-NEXT: vpand %ymm1, %ymm0, %ymm0
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, (%rsp)
-; AVX512F-NEXT: movl (%rsp), %eax
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
+; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm0
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2}
+; AVX512F-NEXT: kmovw %k0, %ecx
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1}
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: shll $16, %eax
+; AVX512F-NEXT: orl %ecx, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll?rev=323201&r1=323200&r2=323201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-and-setcc-512.ll Tue Jan 23 06:25:39 2018
@@ -280,38 +280,22 @@ define i32 @v32i16(<32 x i16> %a, <32 x
;
; AVX512F-LABEL: v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $32, %rsp
+; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k1
; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k2
+; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm0
+; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm1
; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
-; AVX512F-NEXT: vpcmpgtw %ymm6, %ymm4, %ymm2
-; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
-; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
-; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX512F-NEXT: vpcmpgtw %ymm7, %ymm5, %ymm2
-; AVX512F-NEXT: vpmovsxwd %ymm2, %zmm2
-; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
-; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512F-NEXT: vpslld $31, %zmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, (%rsp)
-; AVX512F-NEXT: movl (%rsp), %eax
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k2}
+; AVX512F-NEXT: kmovw %k0, %ecx
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: shll $16, %eax
+; AVX512F-NEXT: orl %ecx, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -1332,43 +1316,40 @@ define i64 @v64i8(<64 x i8> %a, <64 x i8
;
; AVX512F-LABEL: v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $64, %rsp
; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1
+; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm3
+; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
+; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k1
+; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
-; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm2
-; AVX512F-NEXT: vpand %ymm2, %ymm1, %ymm1
-; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
-; AVX512F-NEXT: vpand %ymm2, %ymm0, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
-; AVX512F-NEXT: vpslld $31, %zmm2, %zmm2
-; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k3
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, (%rsp)
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k4
+; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm5, %ymm0
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm0
-; AVX512F-NEXT: vpslld $31, %zmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: movl (%rsp), %ecx
-; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm4, %ymm2
+; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3
+; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
+; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
+; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0 {%k4}
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0 {%k3}
+; AVX512F-NEXT: kmovw %k0, %ecx
+; AVX512F-NEXT: shll $16, %ecx
+; AVX512F-NEXT: orl %eax, %ecx
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k2}
+; AVX512F-NEXT: kmovw %k0, %edx
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 {%k1}
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: shll $16, %eax
+; AVX512F-NEXT: orl %edx, %eax
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %rcx, %rax
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll?rev=323201&r1=323200&r2=323201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll Tue Jan 23 06:25:39 2018
@@ -505,24 +505,15 @@ define <32 x i8> @ext_i32_32i8(i32 %a0)
;
; AVX512F-LABEL: ext_i32_32i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $32, %rsp
-; AVX512F-NEXT: movl %edi, (%rsp)
+; AVX512F-NEXT: kmovw %edi, %k1
+; AVX512F-NEXT: shrl $16, %edi
+; AVX512F-NEXT: kmovw %edi, %k2
; AVX512F-NEXT: movl {{.*}}(%rip), %eax
-; AVX512F-NEXT: kmovw (%rsp), %k1
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i32_32i8:
@@ -792,27 +783,14 @@ define <32 x i16> @ext_i32_32i16(i32 %a0
;
; AVX512F-LABEL: ext_i32_32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $32, %rsp
-; AVX512F-NEXT: movl %edi, (%rsp)
-; AVX512F-NEXT: kmovw (%rsp), %k1
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm2 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
-; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0
-; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
-; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1
-; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
+; AVX512F-NEXT: kmovw %edi, %k1
+; AVX512F-NEXT: shrl $16, %edi
+; AVX512F-NEXT: kmovw %edi, %k2
+; AVX512F-NEXT: movl {{.*}}(%rip), %eax
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
+; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
+; AVX512F-NEXT: vpmovdw %zmm1, %ymm1
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i32_32i16:
@@ -950,33 +928,27 @@ define <64 x i8> @ext_i64_64i8(i64 %a0)
;
; AVX512F-LABEL: ext_i64_64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $64, %rsp
-; AVX512F-NEXT: movl %edi, (%rsp)
-; AVX512F-NEXT: shrq $32, %rdi
-; AVX512F-NEXT: movl %edi, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: movq %rdi, %rax
+; AVX512F-NEXT: movq %rdi, %rcx
+; AVX512F-NEXT: kmovw %edi, %k1
+; AVX512F-NEXT: movl %edi, %edx
+; AVX512F-NEXT: shrl $16, %edx
+; AVX512F-NEXT: shrq $32, %rax
+; AVX512F-NEXT: shrq $48, %rcx
+; AVX512F-NEXT: kmovw %ecx, %k2
+; AVX512F-NEXT: kmovw %eax, %k3
+; AVX512F-NEXT: kmovw %edx, %k4
; AVX512F-NEXT: movl {{.*}}(%rip), %eax
-; AVX512F-NEXT: kmovw (%rsp), %k1
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; AVX512F-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
-; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k2} {z}
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k4} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; AVX512F-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
-; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k1} {z}
+; AVX512F-NEXT: vpbroadcastd %eax, %zmm1 {%k3} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vpbroadcastd %eax, %zmm2 {%k2} {z}
; AVX512F-NEXT: vpmovdb %zmm2, %xmm2
; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
; AVX512F-NEXT: retq
;
; AVX512VLBW-LABEL: ext_i64_64i8:
Modified: llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll?rev=323201&r1=323200&r2=323201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-setcc-256.ll Tue Jan 23 06:25:39 2018
@@ -183,24 +183,16 @@ define i32 @v32i8(<32 x i8> %a, <32 x i8
;
; AVX512F-LABEL: v32i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $32, %rsp
; AVX512F-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: kmovw %k0, %ecx
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, (%rsp)
-; AVX512F-NEXT: movl (%rsp), %eax
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: shll $16, %eax
+; AVX512F-NEXT: orl %ecx, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll?rev=323201&r1=323200&r2=323201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll (original)
+++ llvm/trunk/test/CodeGen/X86/bitcast-setcc-512.ll Tue Jan 23 06:25:39 2018
@@ -51,24 +51,16 @@ define i32 @v32i16(<32 x i16> %a, <32 x
;
; AVX512F-LABEL: v32i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $32, %rsp
-; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm1
-; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; AVX512F-NEXT: vpcmpgtw %ymm2, %ymm0, %ymm0
; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, (%rsp)
-; AVX512F-NEXT: movl (%rsp), %eax
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
+; AVX512F-NEXT: kmovw %k0, %ecx
+; AVX512F-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm0
+; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: shll $16, %eax
+; AVX512F-NEXT: orl %ecx, %eax
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
@@ -1006,35 +998,28 @@ define i64 @v64i8(<64 x i8> %a, <64 x i8
;
; AVX512F-LABEL: v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: pushq %rbp
-; AVX512F-NEXT: .cfi_def_cfa_offset 16
-; AVX512F-NEXT: .cfi_offset %rbp, -16
-; AVX512F-NEXT: movq %rsp, %rbp
-; AVX512F-NEXT: .cfi_def_cfa_register %rbp
-; AVX512F-NEXT: andq $-32, %rsp
-; AVX512F-NEXT: subq $64, %rsp
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
-; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm2
; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, (%rsp)
+; AVX512F-NEXT: kmovw %k0, %ecx
+; AVX512F-NEXT: shll $16, %ecx
+; AVX512F-NEXT: orl %eax, %ecx
; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; AVX512F-NEXT: kmovw %k0, %edx
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
-; AVX512F-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; AVX512F-NEXT: movl (%rsp), %ecx
-; AVX512F-NEXT: movl {{[0-9]+}}(%rsp), %eax
+; AVX512F-NEXT: kmovw %k0, %eax
+; AVX512F-NEXT: shll $16, %eax
+; AVX512F-NEXT: orl %edx, %eax
; AVX512F-NEXT: shlq $32, %rax
; AVX512F-NEXT: orq %rcx, %rax
-; AVX512F-NEXT: movq %rbp, %rsp
-; AVX512F-NEXT: popq %rbp
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
Modified: llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll?rev=323201&r1=323200&r2=323201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll (original)
+++ llvm/trunk/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll Tue Jan 23 06:25:39 2018
@@ -132,22 +132,70 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7
; AVX256VL: # %bb.0:
; AVX256VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX256VL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; AVX256VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX256VL-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX256VL-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX256VL-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX256VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX256VL-NEXT: vpmovsxbw %xmm1, %ymm1
+; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k1
+; AVX256VL-NEXT: vpmovsxbw %xmm0, %ymm0
+; AVX256VL-NEXT: vextracti128 $1, %ymm0, %xmm1
+; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k2
+; AVX256VL-NEXT: vpmovsxwd %xmm0, %ymm0
+; AVX256VL-NEXT: vptestmd %ymm0, %ymm0, %k3
+; AVX256VL-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
+; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k3} {z}
+; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1
+; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm2 {%k2} {z}
+; AVX256VL-NEXT: vpmovdw %ymm2, %xmm2
+; AVX256VL-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; AVX256VL-NEXT: vpermq {{.*#+}} ymm2 = ymm1[2,3,0,1]
+; AVX256VL-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1],ymm2[2],ymm1[3],ymm2[4,5],ymm1[6],ymm2[7]
+; AVX256VL-NEXT: vpshufb {{.*#+}} ymm1 = ymm1[6,7,12,13,u,u,8,9,6,7,14,15,14,15,0,1,22,23,28,29,18,19,26,27,22,23,u,u,30,31,16,17]
+; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm2 {%k1} {z}
+; AVX256VL-NEXT: vpmovdw %ymm2, %xmm2
+; AVX256VL-NEXT: kshiftrw $8, %k1, %k1
+; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm3 {%k1} {z}
+; AVX256VL-NEXT: vpmovdw %ymm3, %xmm3
+; AVX256VL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2
+; AVX256VL-NEXT: vpermq {{.*#+}} ymm2 = ymm2[1,1,2,1]
+; AVX256VL-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,0,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,0,0,255,255,255,255]
+; AVX256VL-NEXT: vpblendvb %ymm3, %ymm1, %ymm2, %ymm1
+; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm2
+; AVX256VL-NEXT: vpslld $31, %ymm2, %ymm2
+; AVX256VL-NEXT: vptestmd %ymm2, %ymm2, %k1
+; AVX256VL-NEXT: vextracti128 $1, %ymm1, %xmm1
+; AVX256VL-NEXT: vpmovsxwd %xmm1, %ymm1
+; AVX256VL-NEXT: vpslld $31, %ymm1, %ymm1
+; AVX256VL-NEXT: vptestmd %ymm1, %ymm1, %k0
+; AVX256VL-NEXT: kunpckbw %k1, %k0, %k0
+; AVX256VL-NEXT: kshiftrw $8, %k0, %k2
+; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z}
+; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1
+; AVX256VL-NEXT: vpacksswb %xmm0, %xmm1, %xmm1
+; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z}
+; AVX256VL-NEXT: vpmovdw %ymm0, %xmm0
+; AVX256VL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
+; AVX256VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; AVX256VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX256VL-NEXT: retq
;
; AVX512NOBW-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
; AVX512NOBW: # %bb.0:
; AVX512NOBW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512NOBW-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0
-; AVX512NOBW-NEXT: vpshufb {{.*#+}} ymm1 = ymm0[3,6,u,12,3,7,7,0,3,6,1,13,3,u,7,0,u,u,22,u,u,u,u,u,u,u,u,u,u,21,u,u]
-; AVX512NOBW-NEXT: vpermq {{.*#+}} ymm0 = ymm0[2,3,0,1]
-; AVX512NOBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[u,u,6,u,u,u,u,u,u,u,u,u,u,5,u,u,19,22,u,28,19,23,23,16,19,22,17,29,19,u,23,16]
-; AVX512NOBW-NEXT: vmovdqa {{.*#+}} ymm2 = [255,255,0,255,255,255,255,255,255,255,255,255,255,0,255,255,0,0,255,0,0,0,0,0,0,0,0,0,0,255,0,0]
-; AVX512NOBW-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0
+; AVX512NOBW-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512NOBW-NEXT: vptestmd %zmm1, %zmm1, %k1
+; AVX512NOBW-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512NOBW-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512NOBW-NEXT: vptestmd %zmm0, %zmm0, %k2
+; AVX512NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512NOBW-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; AVX512NOBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,6,22,12,3,7,7,0,3,6,1,13,3,21,7,0]
+; AVX512NOBW-NEXT: vpermi2d %zmm0, %zmm1, %zmm2
+; AVX512NOBW-NEXT: vptestmd %zmm2, %zmm2, %k1
+; AVX512NOBW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; AVX512NOBW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512NOBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
; AVX512NOBW-NEXT: retq
;
; AVX256VLBW-LABEL: shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0:
Modified: llvm/trunk/test/CodeGen/X86/vector-compare-results.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-compare-results.ll?rev=323201&r1=323200&r2=323201&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-compare-results.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-compare-results.ll Tue Jan 23 06:25:39 2018
@@ -2257,23 +2257,25 @@ define <64 x i1> @test_cmp_v64i8(<64 x i
;
; AVX512F-LABEL: test_cmp_v64i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4
; AVX512F-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vextracti128 $1, %ymm4, %xmm3
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm4
+; AVX512F-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm2
+; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX512F-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512F-NEXT: vmovdqa %xmm4, %xmm2
+; AVX512F-NEXT: vmovdqa %xmm4, %xmm1
+; AVX512F-NEXT: # kill: def %xmm2 killed %xmm2 killed %ymm2
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: test_cmp_v64i8:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm4
; AVX512DQ-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0
-; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512DQ-NEXT: vextracti128 $1, %ymm4, %xmm3
+; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm4
+; AVX512DQ-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm2
+; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
; AVX512DQ-NEXT: # kill: def %xmm0 killed %xmm0 killed %ymm0
-; AVX512DQ-NEXT: vmovdqa %xmm4, %xmm2
+; AVX512DQ-NEXT: vmovdqa %xmm4, %xmm1
+; AVX512DQ-NEXT: # kill: def %xmm2 killed %xmm2 killed %ymm2
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
@@ -2711,32 +2713,24 @@ define <32 x i1> @test_cmp_v32f32(<32 x
;
; AVX512F-LABEL: test_cmp_v32f32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vcmpltps %zmm0, %zmm2, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vcmpltps %zmm1, %zmm3, %k1
+; AVX512F-NEXT: vcmpltps %zmm0, %zmm2, %k2
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: test_cmp_v32f32:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k0
-; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vcmpltps %zmm1, %zmm3, %k0
+; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm2, %k1
+; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: test_cmp_v32f32:
@@ -3262,32 +3256,24 @@ define <32 x i1> @test_cmp_v32i32(<32 x
;
; AVX512F-LABEL: test_cmp_v32i32:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpcmpgtd %zmm3, %zmm1, %k1
+; AVX512F-NEXT: vpcmpgtd %zmm2, %zmm0, %k2
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: test_cmp_v32i32:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k0
-; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpcmpgtd %zmm3, %zmm1, %k0
+; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k1
+; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: test_cmp_v32i32:
@@ -6386,36 +6372,36 @@ define <128 x i1> @test_cmp_v128i8(<128
; AVX512F-LABEL: test_cmp_v128i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0
-; AVX512F-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1
-; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2
-; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3
-; AVX512F-NEXT: vextracti128 $1, %ymm3, %xmm4
-; AVX512F-NEXT: vpmovsxbd %xmm4, %zmm4
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm4
; AVX512F-NEXT: vptestmd %zmm4, %zmm4, %k0
-; AVX512F-NEXT: kmovw %k0, 14(%rdi)
-; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
-; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
-; AVX512F-NEXT: kmovw %k0, 12(%rdi)
-; AVX512F-NEXT: vextracti128 $1, %ymm2, %xmm3
-; AVX512F-NEXT: vpmovsxbd %xmm3, %zmm3
-; AVX512F-NEXT: vptestmd %zmm3, %zmm3, %k0
-; AVX512F-NEXT: kmovw %k0, 10(%rdi)
-; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
-; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512F-NEXT: kmovw %k0, 8(%rdi)
-; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512F-NEXT: vpmovsxbd %xmm2, %zmm2
-; AVX512F-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512F-NEXT: kmovw %k0, 6(%rdi)
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, 4(%rdi)
-; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512F-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: kmovw %k0, 2(%rdi)
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512F-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm0
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k2
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k3
+; AVX512F-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm0
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k4
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k5
+; AVX512F-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm0
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k6
+; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512F-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k7
+; AVX512F-NEXT: kmovw %k7, 14(%rdi)
+; AVX512F-NEXT: kmovw %k6, 12(%rdi)
+; AVX512F-NEXT: kmovw %k5, 10(%rdi)
+; AVX512F-NEXT: kmovw %k4, 8(%rdi)
+; AVX512F-NEXT: kmovw %k3, 6(%rdi)
+; AVX512F-NEXT: kmovw %k2, 4(%rdi)
+; AVX512F-NEXT: kmovw %k1, 2(%rdi)
; AVX512F-NEXT: kmovw %k0, (%rdi)
; AVX512F-NEXT: movq %rdi, %rax
; AVX512F-NEXT: vzeroupper
@@ -6424,36 +6410,36 @@ define <128 x i1> @test_cmp_v128i8(<128
; AVX512DQ-LABEL: test_cmp_v128i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm1
-; AVX512DQ-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm2
-; AVX512DQ-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm3
-; AVX512DQ-NEXT: vextracti128 $1, %ymm3, %xmm4
-; AVX512DQ-NEXT: vpmovsxbd %xmm4, %zmm4
+; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm4
; AVX512DQ-NEXT: vptestmd %zmm4, %zmm4, %k0
-; AVX512DQ-NEXT: kmovw %k0, 14(%rdi)
-; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
-; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
-; AVX512DQ-NEXT: kmovw %k0, 12(%rdi)
-; AVX512DQ-NEXT: vextracti128 $1, %ymm2, %xmm3
-; AVX512DQ-NEXT: vpmovsxbd %xmm3, %zmm3
-; AVX512DQ-NEXT: vptestmd %zmm3, %zmm3, %k0
-; AVX512DQ-NEXT: kmovw %k0, 10(%rdi)
-; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
-; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512DQ-NEXT: kmovw %k0, 8(%rdi)
-; AVX512DQ-NEXT: vextracti128 $1, %ymm1, %xmm2
-; AVX512DQ-NEXT: vpmovsxbd %xmm2, %zmm2
-; AVX512DQ-NEXT: vptestmd %zmm2, %zmm2, %k0
-; AVX512DQ-NEXT: kmovw %k0, 6(%rdi)
-; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512DQ-NEXT: kmovw %k0, 4(%rdi)
-; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX512DQ-NEXT: vpmovsxbd %xmm1, %zmm1
-; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512DQ-NEXT: kmovw %k0, 2(%rdi)
+; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k1
+; AVX512DQ-NEXT: vpcmpgtb %ymm5, %ymm1, %ymm0
+; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k2
+; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k3
+; AVX512DQ-NEXT: vpcmpgtb %ymm6, %ymm2, %ymm0
+; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k4
+; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
-; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k0
+; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k5
+; AVX512DQ-NEXT: vpcmpgtb %ymm7, %ymm3, %ymm0
+; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm1
+; AVX512DQ-NEXT: vptestmd %zmm1, %zmm1, %k6
+; AVX512DQ-NEXT: vextracti128 $1, %ymm0, %xmm0
+; AVX512DQ-NEXT: vpmovsxbd %xmm0, %zmm0
+; AVX512DQ-NEXT: vptestmd %zmm0, %zmm0, %k7
+; AVX512DQ-NEXT: kmovw %k7, 14(%rdi)
+; AVX512DQ-NEXT: kmovw %k6, 12(%rdi)
+; AVX512DQ-NEXT: kmovw %k5, 10(%rdi)
+; AVX512DQ-NEXT: kmovw %k4, 8(%rdi)
+; AVX512DQ-NEXT: kmovw %k3, 6(%rdi)
+; AVX512DQ-NEXT: kmovw %k2, 4(%rdi)
+; AVX512DQ-NEXT: kmovw %k1, 2(%rdi)
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
; AVX512DQ-NEXT: movq %rdi, %rax
; AVX512DQ-NEXT: vzeroupper
@@ -6910,40 +6896,32 @@ define <32 x i1> @test_cmp_v32f64(<32 x
;
; AVX512F-LABEL: test_cmp_v32f64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vcmpltpd %zmm0, %zmm4, %k0
-; AVX512F-NEXT: vcmpltpd %zmm1, %zmm5, %k1
-; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vcmpltpd %zmm2, %zmm6, %k0
; AVX512F-NEXT: vcmpltpd %zmm3, %zmm7, %k1
; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
+; AVX512F-NEXT: vcmpltpd %zmm0, %zmm4, %k0
+; AVX512F-NEXT: vcmpltpd %zmm1, %zmm5, %k2
+; AVX512F-NEXT: kunpckbw %k0, %k2, %k2
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: test_cmp_v32f64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm4, %k0
-; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm5, %k1
-; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0
-; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vcmpltpd %zmm2, %zmm6, %k0
; AVX512DQ-NEXT: vcmpltpd %zmm3, %zmm7, %k1
; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0
+; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm4, %k1
+; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm5, %k2
+; AVX512DQ-NEXT: kunpckbw %k1, %k2, %k1
+; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: test_cmp_v32f64:
@@ -7561,40 +7539,32 @@ define <32 x i1> @test_cmp_v32i64(<32 x
;
; AVX512F-LABEL: test_cmp_v32i64:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpcmpgtq %zmm4, %zmm0, %k0
-; AVX512F-NEXT: vpcmpgtq %zmm5, %zmm1, %k1
-; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
-; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpcmpgtq %zmm6, %zmm2, %k0
; AVX512F-NEXT: vpcmpgtq %zmm7, %zmm3, %k1
; AVX512F-NEXT: kunpckbw %k0, %k1, %k1
+; AVX512F-NEXT: vpcmpgtq %zmm4, %zmm0, %k0
+; AVX512F-NEXT: vpcmpgtq %zmm5, %zmm1, %k2
+; AVX512F-NEXT: kunpckbw %k0, %k2, %k2
+; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z}
+; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
; AVX512F-NEXT: vpmovdb %zmm1, %xmm1
; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512F-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512F-NEXT: retq
;
; AVX512DQ-LABEL: test_cmp_v32i64:
; AVX512DQ: # %bb.0:
-; AVX512DQ-NEXT: vpcmpgtq %zmm4, %zmm0, %k0
-; AVX512DQ-NEXT: vpcmpgtq %zmm5, %zmm1, %k1
-; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0
-; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
-; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpcmpgtq %zmm6, %zmm2, %k0
; AVX512DQ-NEXT: vpcmpgtq %zmm7, %zmm3, %k1
; AVX512DQ-NEXT: kunpckbw %k0, %k1, %k0
+; AVX512DQ-NEXT: vpcmpgtq %zmm4, %zmm0, %k1
+; AVX512DQ-NEXT: vpcmpgtq %zmm5, %zmm1, %k2
+; AVX512DQ-NEXT: kunpckbw %k1, %k2, %k1
+; AVX512DQ-NEXT: vpmovm2d %k1, %zmm0
+; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm1
; AVX512DQ-NEXT: vpmovdb %zmm1, %xmm1
; AVX512DQ-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpsllw $7, %ymm0, %ymm0
-; AVX512DQ-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
-; AVX512DQ-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: test_cmp_v32i64:
More information about the llvm-commits
mailing list