[llvm] r314306 - [X86][SKX][KNL] Updated regression tests to use -mattr instead of -mcpu flag.NFC.
Gadi Haber via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 27 07:44:15 PDT 2017
Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll?rev=314306&r1=314305&r2=314306&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll Wed Sep 27 07:44:15 2017
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl,+avx512dq | FileCheck %s --check-prefix=CHECK --check-prefix=VLX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX
define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask:
@@ -19,8 +19,23 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: movq %rsp, %rbp
; NoVLX-NEXT: .Lcfi2:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi3:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi4:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi5:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi6:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi7:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -29,64 +44,64 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -95,8 +110,14 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -117,15 +138,30 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi3:
+; NoVLX-NEXT: .Lcfi8:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi4:
+; NoVLX-NEXT: .Lcfi9:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi5:
+; NoVLX-NEXT: .Lcfi10:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi11:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi12:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi13:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi14:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi15:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -134,64 +170,64 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -200,8 +236,14 @@ define zeroext i32 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -224,15 +266,30 @@ define zeroext i32 @test_masked_vpcmpeqb
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi6:
+; NoVLX-NEXT: .Lcfi16:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi7:
+; NoVLX-NEXT: .Lcfi17:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi8:
+; NoVLX-NEXT: .Lcfi18:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi19:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi20:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi21:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi22:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi23:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -242,64 +299,64 @@ define zeroext i32 @test_masked_vpcmpeqb
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -308,8 +365,14 @@ define zeroext i32 @test_masked_vpcmpeqb
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -333,15 +396,30 @@ define zeroext i32 @test_masked_vpcmpeqb
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi9:
+; NoVLX-NEXT: .Lcfi24:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi10:
+; NoVLX-NEXT: .Lcfi25:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi11:
+; NoVLX-NEXT: .Lcfi26:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi27:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi28:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi29:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi30:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi31:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -351,64 +429,64 @@ define zeroext i32 @test_masked_vpcmpeqb
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -417,8 +495,14 @@ define zeroext i32 @test_masked_vpcmpeqb
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -443,12 +527,12 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi12:
+; NoVLX-NEXT: .Lcfi32:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi13:
+; NoVLX-NEXT: .Lcfi33:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi14:
+; NoVLX-NEXT: .Lcfi34:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -457,20 +541,24 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi15:
+; NoVLX-NEXT: .Lcfi35:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi16:
+; NoVLX-NEXT: .Lcfi36:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi17:
+; NoVLX-NEXT: .Lcfi37:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi18:
+; NoVLX-NEXT: .Lcfi38:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi19:
+; NoVLX-NEXT: .Lcfi39:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -513,11 +601,11 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -529,15 +617,11 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -553,6 +637,7 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -573,12 +658,12 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi20:
+; NoVLX-NEXT: .Lcfi40:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi21:
+; NoVLX-NEXT: .Lcfi41:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi22:
+; NoVLX-NEXT: .Lcfi42:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -587,20 +672,24 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi23:
+; NoVLX-NEXT: .Lcfi43:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi24:
+; NoVLX-NEXT: .Lcfi44:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi25:
+; NoVLX-NEXT: .Lcfi45:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi26:
+; NoVLX-NEXT: .Lcfi46:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi27:
+; NoVLX-NEXT: .Lcfi47:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -643,11 +732,11 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -659,15 +748,11 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -683,6 +768,7 @@ define zeroext i64 @test_vpcmpeqb_v16i1_
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -705,12 +791,12 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi28:
+; NoVLX-NEXT: .Lcfi48:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi29:
+; NoVLX-NEXT: .Lcfi49:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi30:
+; NoVLX-NEXT: .Lcfi50:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -719,21 +805,25 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi31:
+; NoVLX-NEXT: .Lcfi51:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi32:
+; NoVLX-NEXT: .Lcfi52:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi33:
+; NoVLX-NEXT: .Lcfi53:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi34:
+; NoVLX-NEXT: .Lcfi54:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi35:
+; NoVLX-NEXT: .Lcfi55:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -776,11 +866,11 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -792,15 +882,11 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -816,6 +902,7 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -839,12 +926,12 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi36:
+; NoVLX-NEXT: .Lcfi56:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi37:
+; NoVLX-NEXT: .Lcfi57:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi38:
+; NoVLX-NEXT: .Lcfi58:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -853,21 +940,25 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi39:
+; NoVLX-NEXT: .Lcfi59:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi40:
+; NoVLX-NEXT: .Lcfi60:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi41:
+; NoVLX-NEXT: .Lcfi61:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi42:
+; NoVLX-NEXT: .Lcfi62:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi43:
+; NoVLX-NEXT: .Lcfi63:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -910,11 +1001,11 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -926,15 +1017,11 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -950,6 +1037,7 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -975,12 +1063,12 @@ define zeroext i64 @test_vpcmpeqb_v32i1_
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi44:
+; NoVLX-NEXT: .Lcfi64:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi45:
+; NoVLX-NEXT: .Lcfi65:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi46:
+; NoVLX-NEXT: .Lcfi66:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -1003,6 +1091,7 @@ define zeroext i64 @test_vpcmpeqb_v32i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -1024,12 +1113,12 @@ define zeroext i64 @test_vpcmpeqb_v32i1_
; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi47:
+; NoVLX-NEXT: .Lcfi67:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi48:
+; NoVLX-NEXT: .Lcfi68:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi49:
+; NoVLX-NEXT: .Lcfi69:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -1052,6 +1141,7 @@ define zeroext i64 @test_vpcmpeqb_v32i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -1075,12 +1165,12 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi50:
+; NoVLX-NEXT: .Lcfi70:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi51:
+; NoVLX-NEXT: .Lcfi71:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi52:
+; NoVLX-NEXT: .Lcfi72:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -1112,6 +1202,7 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -1136,12 +1227,12 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi53:
+; NoVLX-NEXT: .Lcfi73:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi54:
+; NoVLX-NEXT: .Lcfi74:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi55:
+; NoVLX-NEXT: .Lcfi75:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -1173,6 +1264,7 @@ define zeroext i64 @test_masked_vpcmpeqb
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -1205,6 +1297,7 @@ define zeroext i16 @test_vpcmpeqw_v8i1_v
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1233,6 +1326,7 @@ define zeroext i16 @test_vpcmpeqw_v8i1_v
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1264,6 +1358,7 @@ define zeroext i16 @test_masked_vpcmpeqw
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1296,6 +1391,7 @@ define zeroext i16 @test_masked_vpcmpeqw
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1320,12 +1416,12 @@ define zeroext i32 @test_vpcmpeqw_v8i1_v
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi56:
+; NoVLX-NEXT: .Lcfi76:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi57:
+; NoVLX-NEXT: .Lcfi77:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi58:
+; NoVLX-NEXT: .Lcfi78:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -1375,6 +1471,7 @@ define zeroext i32 @test_vpcmpeqw_v8i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1395,12 +1492,12 @@ define zeroext i32 @test_vpcmpeqw_v8i1_v
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi59:
+; NoVLX-NEXT: .Lcfi79:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi60:
+; NoVLX-NEXT: .Lcfi80:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi61:
+; NoVLX-NEXT: .Lcfi81:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -1450,6 +1547,7 @@ define zeroext i32 @test_vpcmpeqw_v8i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1472,12 +1570,12 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi62:
+; NoVLX-NEXT: .Lcfi82:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi63:
+; NoVLX-NEXT: .Lcfi83:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi64:
+; NoVLX-NEXT: .Lcfi84:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -1528,6 +1626,7 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1551,12 +1650,12 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi65:
+; NoVLX-NEXT: .Lcfi85:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi66:
+; NoVLX-NEXT: .Lcfi86:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi67:
+; NoVLX-NEXT: .Lcfi87:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -1607,6 +1706,7 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1631,12 +1731,12 @@ define zeroext i64 @test_vpcmpeqw_v8i1_v
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi68:
+; NoVLX-NEXT: .Lcfi88:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi69:
+; NoVLX-NEXT: .Lcfi89:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi70:
+; NoVLX-NEXT: .Lcfi90:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -1644,43 +1744,43 @@ define zeroext i64 @test_vpcmpeqw_v8i1_v
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1691,6 +1791,7 @@ define zeroext i64 @test_vpcmpeqw_v8i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1711,12 +1812,12 @@ define zeroext i64 @test_vpcmpeqw_v8i1_v
; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi71:
+; NoVLX-NEXT: .Lcfi91:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi72:
+; NoVLX-NEXT: .Lcfi92:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi73:
+; NoVLX-NEXT: .Lcfi93:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -1724,43 +1825,43 @@ define zeroext i64 @test_vpcmpeqw_v8i1_v
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1771,6 +1872,7 @@ define zeroext i64 @test_vpcmpeqw_v8i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1793,12 +1895,12 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi74:
+; NoVLX-NEXT: .Lcfi94:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi75:
+; NoVLX-NEXT: .Lcfi95:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi76:
+; NoVLX-NEXT: .Lcfi96:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -1807,43 +1909,43 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1854,6 +1956,7 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1877,12 +1980,12 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi77:
+; NoVLX-NEXT: .Lcfi97:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi78:
+; NoVLX-NEXT: .Lcfi98:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi79:
+; NoVLX-NEXT: .Lcfi99:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -1891,43 +1994,43 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -1938,6 +2041,7 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -1963,15 +2067,30 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi80:
+; NoVLX-NEXT: .Lcfi100:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi81:
+; NoVLX-NEXT: .Lcfi101:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi82:
+; NoVLX-NEXT: .Lcfi102:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi103:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi104:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi105:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi106:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi107:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -1980,64 +2099,64 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -2046,8 +2165,14 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -2069,15 +2194,30 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi83:
+; NoVLX-NEXT: .Lcfi108:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi84:
+; NoVLX-NEXT: .Lcfi109:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi85:
+; NoVLX-NEXT: .Lcfi110:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi111:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi112:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi113:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi114:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi115:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -2086,64 +2226,64 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -2152,8 +2292,14 @@ define zeroext i32 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -2177,15 +2323,30 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi86:
+; NoVLX-NEXT: .Lcfi116:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi87:
+; NoVLX-NEXT: .Lcfi117:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi88:
+; NoVLX-NEXT: .Lcfi118:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi119:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi120:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi121:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi122:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi123:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -2195,64 +2356,64 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -2261,8 +2422,14 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -2287,15 +2454,30 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi89:
+; NoVLX-NEXT: .Lcfi124:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi90:
+; NoVLX-NEXT: .Lcfi125:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi91:
+; NoVLX-NEXT: .Lcfi126:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi127:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi128:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi129:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi130:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi131:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -2305,64 +2487,64 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -2371,8 +2553,14 @@ define zeroext i32 @test_masked_vpcmpeqw
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -2398,12 +2586,12 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi92:
+; NoVLX-NEXT: .Lcfi132:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi93:
+; NoVLX-NEXT: .Lcfi133:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi94:
+; NoVLX-NEXT: .Lcfi134:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -2412,20 +2600,24 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi95:
+; NoVLX-NEXT: .Lcfi135:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi96:
+; NoVLX-NEXT: .Lcfi136:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi97:
+; NoVLX-NEXT: .Lcfi137:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi98:
+; NoVLX-NEXT: .Lcfi138:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi99:
+; NoVLX-NEXT: .Lcfi139:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -2468,11 +2660,11 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -2484,15 +2676,11 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -2508,6 +2696,7 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -2529,12 +2718,12 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi100:
+; NoVLX-NEXT: .Lcfi140:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi101:
+; NoVLX-NEXT: .Lcfi141:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi102:
+; NoVLX-NEXT: .Lcfi142:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -2543,20 +2732,24 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi103:
+; NoVLX-NEXT: .Lcfi143:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi104:
+; NoVLX-NEXT: .Lcfi144:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi105:
+; NoVLX-NEXT: .Lcfi145:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi106:
+; NoVLX-NEXT: .Lcfi146:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi107:
+; NoVLX-NEXT: .Lcfi147:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -2599,11 +2792,11 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -2615,15 +2808,11 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -2639,6 +2828,7 @@ define zeroext i64 @test_vpcmpeqw_v16i1_
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -2662,12 +2852,12 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi108:
+; NoVLX-NEXT: .Lcfi148:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi109:
+; NoVLX-NEXT: .Lcfi149:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi110:
+; NoVLX-NEXT: .Lcfi150:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -2676,21 +2866,25 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi111:
+; NoVLX-NEXT: .Lcfi151:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi112:
+; NoVLX-NEXT: .Lcfi152:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi113:
+; NoVLX-NEXT: .Lcfi153:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi114:
+; NoVLX-NEXT: .Lcfi154:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi115:
+; NoVLX-NEXT: .Lcfi155:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -2733,11 +2927,11 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -2749,15 +2943,11 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -2773,6 +2963,7 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -2797,12 +2988,12 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi116:
+; NoVLX-NEXT: .Lcfi156:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi117:
+; NoVLX-NEXT: .Lcfi157:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi118:
+; NoVLX-NEXT: .Lcfi158:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -2811,21 +3002,25 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi119:
+; NoVLX-NEXT: .Lcfi159:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi120:
+; NoVLX-NEXT: .Lcfi160:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi121:
+; NoVLX-NEXT: .Lcfi161:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi122:
+; NoVLX-NEXT: .Lcfi162:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi123:
+; NoVLX-NEXT: .Lcfi163:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -2868,11 +3063,11 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -2884,15 +3079,11 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -2908,6 +3099,7 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -2933,58 +3125,62 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi124:
+; NoVLX-NEXT: .Lcfi164:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi125:
+; NoVLX-NEXT: .Lcfi165:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi126:
+; NoVLX-NEXT: .Lcfi166:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
@@ -2992,194 +3188,190 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm3, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
@@ -3263,6 +3455,7 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -3284,68 +3477,69 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi127:
+; NoVLX-NEXT: .Lcfi167:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi128:
+; NoVLX-NEXT: .Lcfi168:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi129:
+; NoVLX-NEXT: .Lcfi169:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -3353,8 +3547,7 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -3364,19 +3557,19 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
@@ -3529,6 +3722,7 @@ define zeroext i64 @test_vpcmpeqw_v32i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -3552,12 +3746,12 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi130:
+; NoVLX-NEXT: .Lcfi170:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi131:
+; NoVLX-NEXT: .Lcfi171:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi132:
+; NoVLX-NEXT: .Lcfi172:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -3568,12 +3762,17 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
@@ -3581,10 +3780,9 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
@@ -3602,40 +3800,39 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
@@ -3643,72 +3840,69 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm5
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm6
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm6, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm6
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm7, %xmm7
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm6
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm6, %xmm6
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
@@ -3717,13 +3911,18 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm8, %ymm3, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm4
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
@@ -3732,152 +3931,147 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vpcmpeqw %ymm4, %ymm1, %ymm2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm5, %ymm1
-; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm4, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -3891,6 +4085,7 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -3915,12 +4110,12 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi133:
+; NoVLX-NEXT: .Lcfi173:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi134:
+; NoVLX-NEXT: .Lcfi174:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi135:
+; NoVLX-NEXT: .Lcfi175:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -3932,6 +4127,8 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
@@ -3944,20 +4141,19 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
@@ -3965,7 +4161,6 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
@@ -3988,174 +4183,174 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm4
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
-; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -4169,6 +4364,7 @@ define zeroext i64 @test_masked_vpcmpeqw
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -4210,8 +4406,8 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -4228,6 +4424,7 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4265,8 +4462,8 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -4283,6 +4480,7 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4305,6 +4503,7 @@ define zeroext i8 @test_masked_vpcmpeqd_
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -4316,14 +4515,13 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -4340,8 +4538,8 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -4358,6 +4556,7 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4417,8 +4616,8 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -4435,6 +4634,7 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4478,8 +4678,8 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -4496,6 +4696,7 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4520,6 +4721,7 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -4531,14 +4733,13 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -4555,8 +4756,8 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -4573,6 +4774,7 @@ define zeroext i8 @test_masked_vpcmpeqd_
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4615,8 +4817,8 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -4633,6 +4835,7 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4669,8 +4872,8 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -4687,6 +4890,7 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4709,6 +4913,7 @@ define zeroext i16 @test_masked_vpcmpeqd
;
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -4720,14 +4925,13 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -4743,8 +4947,8 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -4761,6 +4965,7 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4819,8 +5024,8 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -4837,6 +5042,7 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4879,8 +5085,8 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -4897,6 +5103,7 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4921,6 +5128,7 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -4932,14 +5140,13 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -4955,8 +5162,8 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -4973,6 +5180,7 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -4999,12 +5207,12 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi136:
+; NoVLX-NEXT: .Lcfi176:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi137:
+; NoVLX-NEXT: .Lcfi177:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi138:
+; NoVLX-NEXT: .Lcfi178:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -5022,6 +5230,7 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5042,12 +5251,12 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi139:
+; NoVLX-NEXT: .Lcfi179:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi140:
+; NoVLX-NEXT: .Lcfi180:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi141:
+; NoVLX-NEXT: .Lcfi181:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -5065,6 +5274,7 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5087,15 +5297,16 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi142:
+; NoVLX-NEXT: .Lcfi182:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi143:
+; NoVLX-NEXT: .Lcfi183:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi144:
+; NoVLX-NEXT: .Lcfi184:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -5107,14 +5318,13 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -5128,6 +5338,7 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5152,12 +5363,12 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi145:
+; NoVLX-NEXT: .Lcfi185:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi146:
+; NoVLX-NEXT: .Lcfi186:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi147:
+; NoVLX-NEXT: .Lcfi187:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -5193,6 +5404,7 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5218,12 +5430,12 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi148:
+; NoVLX-NEXT: .Lcfi188:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi149:
+; NoVLX-NEXT: .Lcfi189:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi150:
+; NoVLX-NEXT: .Lcfi190:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -5242,6 +5454,7 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5265,16 +5478,17 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi151:
+; NoVLX-NEXT: .Lcfi191:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi152:
+; NoVLX-NEXT: .Lcfi192:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi153:
+; NoVLX-NEXT: .Lcfi193:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -5286,14 +5500,13 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -5307,6 +5520,7 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5333,20 +5547,20 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi154:
+; NoVLX-NEXT: .Lcfi194:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi155:
+; NoVLX-NEXT: .Lcfi195:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi156:
+; NoVLX-NEXT: .Lcfi196:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -5362,6 +5576,7 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5382,20 +5597,20 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi157:
+; NoVLX-NEXT: .Lcfi197:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi158:
+; NoVLX-NEXT: .Lcfi198:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi159:
+; NoVLX-NEXT: .Lcfi199:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -5411,6 +5626,7 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5433,18 +5649,19 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi160:
+; NoVLX-NEXT: .Lcfi200:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi161:
+; NoVLX-NEXT: .Lcfi201:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi162:
+; NoVLX-NEXT: .Lcfi202:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -5457,14 +5674,13 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -5480,6 +5696,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5504,18 +5721,19 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi163:
+; NoVLX-NEXT: .Lcfi203:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi164:
+; NoVLX-NEXT: .Lcfi204:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi165:
+; NoVLX-NEXT: .Lcfi205:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -5528,14 +5746,13 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -5551,6 +5768,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5576,12 +5794,12 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v
; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi166:
+; NoVLX-NEXT: .Lcfi206:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi167:
+; NoVLX-NEXT: .Lcfi207:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi168:
+; NoVLX-NEXT: .Lcfi208:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -5589,8 +5807,8 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -5606,6 +5824,7 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5629,12 +5848,12 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi169:
+; NoVLX-NEXT: .Lcfi209:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi170:
+; NoVLX-NEXT: .Lcfi210:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi171:
+; NoVLX-NEXT: .Lcfi211:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -5642,6 +5861,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -5654,14 +5874,13 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -5677,6 +5896,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -5711,6 +5931,7 @@ define zeroext i16 @test_vpcmpeqd_v8i1_v
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -5739,6 +5960,7 @@ define zeroext i16 @test_vpcmpeqd_v8i1_v
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -5770,6 +5992,7 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -5802,6 +6025,7 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -5834,6 +6058,7 @@ define zeroext i16 @test_vpcmpeqd_v8i1_v
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -5866,6 +6091,7 @@ define zeroext i16 @test_masked_vpcmpeqd
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -5892,12 +6118,12 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi172:
+; NoVLX-NEXT: .Lcfi212:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi173:
+; NoVLX-NEXT: .Lcfi213:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi174:
+; NoVLX-NEXT: .Lcfi214:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -5946,6 +6172,7 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -5967,12 +6194,12 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi175:
+; NoVLX-NEXT: .Lcfi215:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi176:
+; NoVLX-NEXT: .Lcfi216:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi177:
+; NoVLX-NEXT: .Lcfi217:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -6021,6 +6248,7 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6044,12 +6272,12 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi178:
+; NoVLX-NEXT: .Lcfi218:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi179:
+; NoVLX-NEXT: .Lcfi219:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi180:
+; NoVLX-NEXT: .Lcfi220:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -6099,6 +6327,7 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6123,12 +6352,12 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi181:
+; NoVLX-NEXT: .Lcfi221:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi182:
+; NoVLX-NEXT: .Lcfi222:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi183:
+; NoVLX-NEXT: .Lcfi223:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -6178,6 +6407,7 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6203,12 +6433,12 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi184:
+; NoVLX-NEXT: .Lcfi224:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi185:
+; NoVLX-NEXT: .Lcfi225:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi186:
+; NoVLX-NEXT: .Lcfi226:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -6257,6 +6487,7 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6281,12 +6512,12 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi187:
+; NoVLX-NEXT: .Lcfi227:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi188:
+; NoVLX-NEXT: .Lcfi228:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi189:
+; NoVLX-NEXT: .Lcfi229:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -6336,6 +6567,7 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6362,55 +6594,55 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi190:
+; NoVLX-NEXT: .Lcfi230:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi191:
+; NoVLX-NEXT: .Lcfi231:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi192:
+; NoVLX-NEXT: .Lcfi232:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6421,6 +6653,7 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6442,55 +6675,55 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi193:
+; NoVLX-NEXT: .Lcfi233:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi194:
+; NoVLX-NEXT: .Lcfi234:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi195:
+; NoVLX-NEXT: .Lcfi235:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6501,6 +6734,7 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6524,12 +6758,12 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi196:
+; NoVLX-NEXT: .Lcfi236:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi197:
+; NoVLX-NEXT: .Lcfi237:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi198:
+; NoVLX-NEXT: .Lcfi238:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -6537,43 +6771,43 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6584,6 +6818,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6608,12 +6843,12 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi199:
+; NoVLX-NEXT: .Lcfi239:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi200:
+; NoVLX-NEXT: .Lcfi240:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi201:
+; NoVLX-NEXT: .Lcfi241:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -6621,43 +6856,43 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6668,6 +6903,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6693,55 +6929,55 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v
; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi202:
+; NoVLX-NEXT: .Lcfi242:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi203:
+; NoVLX-NEXT: .Lcfi243:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi204:
+; NoVLX-NEXT: .Lcfi244:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6752,6 +6988,7 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6776,12 +7013,12 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi205:
+; NoVLX-NEXT: .Lcfi245:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi206:
+; NoVLX-NEXT: .Lcfi246:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi207:
+; NoVLX-NEXT: .Lcfi247:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -6789,43 +7026,43 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -6836,6 +7073,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -6862,78 +7100,93 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi208:
+; NoVLX-NEXT: .Lcfi248:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi209:
+; NoVLX-NEXT: .Lcfi249:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi210:
+; NoVLX-NEXT: .Lcfi250:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi251:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi252:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi253:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi254:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi255:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -6942,8 +7195,14 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -6965,78 +7224,93 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi211:
+; NoVLX-NEXT: .Lcfi256:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi212:
+; NoVLX-NEXT: .Lcfi257:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi213:
+; NoVLX-NEXT: .Lcfi258:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi259:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi260:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi261:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi262:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi263:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -7045,8 +7319,14 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -7070,79 +7350,94 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi214:
+; NoVLX-NEXT: .Lcfi264:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi215:
+; NoVLX-NEXT: .Lcfi265:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi216:
+; NoVLX-NEXT: .Lcfi266:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi267:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi268:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi269:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi270:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi271:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -7151,8 +7446,14 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -7177,79 +7478,94 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi217:
+; NoVLX-NEXT: .Lcfi272:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi218:
+; NoVLX-NEXT: .Lcfi273:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi219:
+; NoVLX-NEXT: .Lcfi274:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi275:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi276:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi277:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi278:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi279:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -7258,8 +7574,14 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -7285,78 +7607,93 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi220:
+; NoVLX-NEXT: .Lcfi280:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi221:
+; NoVLX-NEXT: .Lcfi281:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi222:
+; NoVLX-NEXT: .Lcfi282:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi283:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi284:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi285:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi286:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi287:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -7365,8 +7702,14 @@ define zeroext i32 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -7391,79 +7734,94 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi223:
+; NoVLX-NEXT: .Lcfi288:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi224:
+; NoVLX-NEXT: .Lcfi289:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi225:
+; NoVLX-NEXT: .Lcfi290:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi291:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi292:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi293:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi294:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi295:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -7472,8 +7830,14 @@ define zeroext i32 @test_masked_vpcmpeqd
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -7500,12 +7864,12 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi226:
+; NoVLX-NEXT: .Lcfi296:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi227:
+; NoVLX-NEXT: .Lcfi297:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi228:
+; NoVLX-NEXT: .Lcfi298:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -7514,17 +7878,21 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi229:
+; NoVLX-NEXT: .Lcfi299:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi230:
+; NoVLX-NEXT: .Lcfi300:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi231:
+; NoVLX-NEXT: .Lcfi301:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi232:
+; NoVLX-NEXT: .Lcfi302:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi233:
+; NoVLX-NEXT: .Lcfi303:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -7567,11 +7935,11 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -7583,15 +7951,11 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -7607,6 +7971,7 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -7628,12 +7993,12 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi234:
+; NoVLX-NEXT: .Lcfi304:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi235:
+; NoVLX-NEXT: .Lcfi305:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi236:
+; NoVLX-NEXT: .Lcfi306:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -7642,17 +8007,21 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi237:
+; NoVLX-NEXT: .Lcfi307:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi238:
+; NoVLX-NEXT: .Lcfi308:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi239:
+; NoVLX-NEXT: .Lcfi309:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi240:
+; NoVLX-NEXT: .Lcfi310:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi241:
+; NoVLX-NEXT: .Lcfi311:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -7695,11 +8064,11 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -7711,15 +8080,11 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -7735,6 +8100,7 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -7758,12 +8124,12 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi242:
+; NoVLX-NEXT: .Lcfi312:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi243:
+; NoVLX-NEXT: .Lcfi313:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi244:
+; NoVLX-NEXT: .Lcfi314:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -7772,18 +8138,22 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi245:
+; NoVLX-NEXT: .Lcfi315:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi246:
+; NoVLX-NEXT: .Lcfi316:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi247:
+; NoVLX-NEXT: .Lcfi317:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi248:
+; NoVLX-NEXT: .Lcfi318:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi249:
+; NoVLX-NEXT: .Lcfi319:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -7826,11 +8196,11 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -7842,15 +8212,11 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -7866,6 +8232,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -7890,12 +8257,12 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi250:
+; NoVLX-NEXT: .Lcfi320:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi251:
+; NoVLX-NEXT: .Lcfi321:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi252:
+; NoVLX-NEXT: .Lcfi322:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -7904,18 +8271,22 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi253:
+; NoVLX-NEXT: .Lcfi323:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi254:
+; NoVLX-NEXT: .Lcfi324:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi255:
+; NoVLX-NEXT: .Lcfi325:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi256:
+; NoVLX-NEXT: .Lcfi326:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi257:
+; NoVLX-NEXT: .Lcfi327:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -7958,11 +8329,11 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -7974,15 +8345,11 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -7998,6 +8365,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -8023,12 +8391,12 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi258:
+; NoVLX-NEXT: .Lcfi328:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi259:
+; NoVLX-NEXT: .Lcfi329:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi260:
+; NoVLX-NEXT: .Lcfi330:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -8037,17 +8405,21 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi261:
+; NoVLX-NEXT: .Lcfi331:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi262:
+; NoVLX-NEXT: .Lcfi332:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi263:
+; NoVLX-NEXT: .Lcfi333:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi264:
+; NoVLX-NEXT: .Lcfi334:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi265:
+; NoVLX-NEXT: .Lcfi335:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -8090,11 +8462,11 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -8106,15 +8478,11 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -8130,6 +8498,7 @@ define zeroext i64 @test_vpcmpeqd_v16i1_
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -8154,12 +8523,12 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi266:
+; NoVLX-NEXT: .Lcfi336:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi267:
+; NoVLX-NEXT: .Lcfi337:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi268:
+; NoVLX-NEXT: .Lcfi338:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -8168,18 +8537,22 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi269:
+; NoVLX-NEXT: .Lcfi339:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi270:
+; NoVLX-NEXT: .Lcfi340:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi271:
+; NoVLX-NEXT: .Lcfi341:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi272:
+; NoVLX-NEXT: .Lcfi342:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi273:
+; NoVLX-NEXT: .Lcfi343:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -8222,11 +8595,11 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -8238,15 +8611,11 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -8262,6 +8631,7 @@ define zeroext i64 @test_masked_vpcmpeqd
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -8294,6 +8664,7 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8321,6 +8692,7 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8343,6 +8715,7 @@ define zeroext i4 @test_masked_vpcmpeqq_
;
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -8350,16 +8723,16 @@ define zeroext i4 @test_masked_vpcmpeqq_
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8401,6 +8774,7 @@ define zeroext i4 @test_masked_vpcmpeqq_
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8434,6 +8808,7 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8458,6 +8833,7 @@ define zeroext i4 @test_masked_vpcmpeqq_
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -8465,16 +8841,16 @@ define zeroext i4 @test_masked_vpcmpeqq_
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8520,6 +8896,7 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8559,6 +8936,7 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8610,6 +8988,7 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8663,6 +9042,7 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8708,6 +9088,7 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8761,6 +9142,7 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8805,6 +9187,7 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8843,6 +9226,7 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8893,6 +9277,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8945,6 +9330,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -8989,6 +9375,7 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9041,6 +9428,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9067,12 +9455,12 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi274:
+; NoVLX-NEXT: .Lcfi344:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi275:
+; NoVLX-NEXT: .Lcfi345:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi276:
+; NoVLX-NEXT: .Lcfi346:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -9090,6 +9478,7 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9110,12 +9499,12 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi277:
+; NoVLX-NEXT: .Lcfi347:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi278:
+; NoVLX-NEXT: .Lcfi348:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi279:
+; NoVLX-NEXT: .Lcfi349:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -9133,6 +9522,7 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9155,15 +9545,16 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi280:
+; NoVLX-NEXT: .Lcfi350:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi281:
+; NoVLX-NEXT: .Lcfi351:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi282:
+; NoVLX-NEXT: .Lcfi352:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -9171,10 +9562,9 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -9188,6 +9578,7 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9212,12 +9603,12 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi283:
+; NoVLX-NEXT: .Lcfi353:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi284:
+; NoVLX-NEXT: .Lcfi354:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi285:
+; NoVLX-NEXT: .Lcfi355:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -9245,6 +9636,7 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9270,12 +9662,12 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi286:
+; NoVLX-NEXT: .Lcfi356:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi287:
+; NoVLX-NEXT: .Lcfi357:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi288:
+; NoVLX-NEXT: .Lcfi358:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -9294,6 +9686,7 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9317,16 +9710,17 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi289:
+; NoVLX-NEXT: .Lcfi359:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi290:
+; NoVLX-NEXT: .Lcfi360:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi291:
+; NoVLX-NEXT: .Lcfi361:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -9334,10 +9728,9 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -9351,6 +9744,7 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9377,20 +9771,20 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi292:
+; NoVLX-NEXT: .Lcfi362:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi293:
+; NoVLX-NEXT: .Lcfi363:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi294:
+; NoVLX-NEXT: .Lcfi364:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -9406,6 +9800,7 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9426,20 +9821,20 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi295:
+; NoVLX-NEXT: .Lcfi365:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi296:
+; NoVLX-NEXT: .Lcfi366:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi297:
+; NoVLX-NEXT: .Lcfi367:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -9455,6 +9850,7 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9477,12 +9873,12 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi298:
+; NoVLX-NEXT: .Lcfi368:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi299:
+; NoVLX-NEXT: .Lcfi369:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi300:
+; NoVLX-NEXT: .Lcfi370:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -9499,8 +9895,8 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -9516,6 +9912,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9540,12 +9937,12 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi301:
+; NoVLX-NEXT: .Lcfi371:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi302:
+; NoVLX-NEXT: .Lcfi372:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi303:
+; NoVLX-NEXT: .Lcfi373:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -9562,8 +9959,8 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -9579,6 +9976,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9604,12 +10002,12 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v
; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi304:
+; NoVLX-NEXT: .Lcfi374:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi305:
+; NoVLX-NEXT: .Lcfi375:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi306:
+; NoVLX-NEXT: .Lcfi376:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -9617,8 +10015,8 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -9634,6 +10032,7 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9657,12 +10056,12 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi307:
+; NoVLX-NEXT: .Lcfi377:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi308:
+; NoVLX-NEXT: .Lcfi378:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi309:
+; NoVLX-NEXT: .Lcfi379:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -9680,8 +10079,8 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -9697,6 +10096,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -9742,8 +10142,8 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -9760,6 +10160,7 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -9799,8 +10200,8 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -9817,6 +10218,7 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -9841,6 +10243,7 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -9858,7 +10261,6 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -9876,8 +10278,8 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -9894,6 +10296,7 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -9920,6 +10323,7 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -9937,7 +10341,6 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -9955,8 +10358,8 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -9973,6 +10376,7 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10018,8 +10422,8 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -10036,6 +10440,7 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10062,6 +10467,7 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -10079,7 +10485,6 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -10097,8 +10502,8 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -10115,6 +10520,7 @@ define zeroext i8 @test_masked_vpcmpeqq_
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10159,8 +10565,8 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -10177,6 +10583,7 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10215,8 +10622,8 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -10233,6 +10640,7 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10257,6 +10665,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -10274,7 +10683,6 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -10291,8 +10699,8 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -10309,6 +10717,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10335,6 +10744,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -10352,7 +10762,6 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -10369,8 +10778,8 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -10387,6 +10796,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10431,8 +10841,8 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -10449,6 +10859,7 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10475,6 +10886,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -10492,7 +10904,6 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -10509,8 +10920,8 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -10527,6 +10938,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10554,12 +10966,12 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi310:
+; NoVLX-NEXT: .Lcfi380:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi311:
+; NoVLX-NEXT: .Lcfi381:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi312:
+; NoVLX-NEXT: .Lcfi382:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -10578,6 +10990,7 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10599,12 +11012,12 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi313:
+; NoVLX-NEXT: .Lcfi383:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi314:
+; NoVLX-NEXT: .Lcfi384:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi315:
+; NoVLX-NEXT: .Lcfi385:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -10623,6 +11036,7 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10646,16 +11060,17 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi316:
+; NoVLX-NEXT: .Lcfi386:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi317:
+; NoVLX-NEXT: .Lcfi387:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi318:
+; NoVLX-NEXT: .Lcfi388:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -10673,7 +11088,6 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -10688,6 +11102,7 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10713,16 +11128,17 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi319:
+; NoVLX-NEXT: .Lcfi389:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi320:
+; NoVLX-NEXT: .Lcfi390:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi321:
+; NoVLX-NEXT: .Lcfi391:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -10740,7 +11156,6 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -10755,6 +11170,7 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10781,12 +11197,12 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi322:
+; NoVLX-NEXT: .Lcfi392:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi323:
+; NoVLX-NEXT: .Lcfi393:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi324:
+; NoVLX-NEXT: .Lcfi394:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -10806,6 +11222,7 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10830,17 +11247,18 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi325:
+; NoVLX-NEXT: .Lcfi395:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi326:
+; NoVLX-NEXT: .Lcfi396:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi327:
+; NoVLX-NEXT: .Lcfi397:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -10858,7 +11276,6 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -10873,6 +11290,7 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10900,12 +11318,12 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi328:
+; NoVLX-NEXT: .Lcfi398:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi329:
+; NoVLX-NEXT: .Lcfi399:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi330:
+; NoVLX-NEXT: .Lcfi400:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -10913,8 +11331,8 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -10930,6 +11348,7 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -10951,12 +11370,12 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi331:
+; NoVLX-NEXT: .Lcfi401:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi332:
+; NoVLX-NEXT: .Lcfi402:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi333:
+; NoVLX-NEXT: .Lcfi403:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -10964,8 +11383,8 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -10981,6 +11400,7 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -11004,12 +11424,12 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi334:
+; NoVLX-NEXT: .Lcfi404:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi335:
+; NoVLX-NEXT: .Lcfi405:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi336:
+; NoVLX-NEXT: .Lcfi406:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -11017,6 +11437,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -11029,14 +11450,13 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -11052,6 +11472,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -11077,12 +11498,12 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi337:
+; NoVLX-NEXT: .Lcfi407:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi338:
+; NoVLX-NEXT: .Lcfi408:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi339:
+; NoVLX-NEXT: .Lcfi409:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -11090,6 +11511,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -11102,14 +11524,13 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -11125,6 +11546,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -11151,12 +11573,12 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi340:
+; NoVLX-NEXT: .Lcfi410:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi341:
+; NoVLX-NEXT: .Lcfi411:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi342:
+; NoVLX-NEXT: .Lcfi412:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -11165,8 +11587,8 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -11182,6 +11604,7 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -11206,12 +11629,12 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi343:
+; NoVLX-NEXT: .Lcfi413:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi344:
+; NoVLX-NEXT: .Lcfi414:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi345:
+; NoVLX-NEXT: .Lcfi415:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -11220,6 +11643,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -11232,14 +11656,13 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -11255,6 +11678,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -11285,6 +11709,7 @@ define zeroext i16 @test_vpcmpeqq_v8i1_v
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11309,6 +11734,7 @@ define zeroext i16 @test_vpcmpeqq_v8i1_v
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11336,6 +11762,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11364,6 +11791,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11392,6 +11820,7 @@ define zeroext i16 @test_vpcmpeqq_v8i1_v
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11420,6 +11849,7 @@ define zeroext i16 @test_masked_vpcmpeqq
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11446,12 +11876,12 @@ define zeroext i32 @test_vpcmpeqq_v8i1_v
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi346:
+; NoVLX-NEXT: .Lcfi416:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi347:
+; NoVLX-NEXT: .Lcfi417:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi348:
+; NoVLX-NEXT: .Lcfi418:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -11498,6 +11928,7 @@ define zeroext i32 @test_vpcmpeqq_v8i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11519,12 +11950,12 @@ define zeroext i32 @test_vpcmpeqq_v8i1_v
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi349:
+; NoVLX-NEXT: .Lcfi419:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi350:
+; NoVLX-NEXT: .Lcfi420:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi351:
+; NoVLX-NEXT: .Lcfi421:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -11571,6 +12002,7 @@ define zeroext i32 @test_vpcmpeqq_v8i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11594,12 +12026,12 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi352:
+; NoVLX-NEXT: .Lcfi422:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi353:
+; NoVLX-NEXT: .Lcfi423:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi354:
+; NoVLX-NEXT: .Lcfi424:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -11647,6 +12079,7 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11671,12 +12104,12 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi355:
+; NoVLX-NEXT: .Lcfi425:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi356:
+; NoVLX-NEXT: .Lcfi426:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi357:
+; NoVLX-NEXT: .Lcfi427:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -11724,6 +12157,7 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11749,12 +12183,12 @@ define zeroext i32 @test_vpcmpeqq_v8i1_v
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi358:
+; NoVLX-NEXT: .Lcfi428:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi359:
+; NoVLX-NEXT: .Lcfi429:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi360:
+; NoVLX-NEXT: .Lcfi430:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -11801,6 +12235,7 @@ define zeroext i32 @test_vpcmpeqq_v8i1_v
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11825,12 +12260,12 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi361:
+; NoVLX-NEXT: .Lcfi431:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi362:
+; NoVLX-NEXT: .Lcfi432:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi363:
+; NoVLX-NEXT: .Lcfi433:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -11878,6 +12313,7 @@ define zeroext i32 @test_masked_vpcmpeqq
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11904,53 +12340,53 @@ define zeroext i64 @test_vpcmpeqq_v8i1_v
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi364:
+; NoVLX-NEXT: .Lcfi434:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi365:
+; NoVLX-NEXT: .Lcfi435:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi366:
+; NoVLX-NEXT: .Lcfi436:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -11961,6 +12397,7 @@ define zeroext i64 @test_vpcmpeqq_v8i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -11982,54 +12419,54 @@ define zeroext i64 @test_vpcmpeqq_v8i1_v
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi367:
+; NoVLX-NEXT: .Lcfi437:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi368:
+; NoVLX-NEXT: .Lcfi438:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi369:
+; NoVLX-NEXT: .Lcfi439:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -12039,6 +12476,7 @@ define zeroext i64 @test_vpcmpeqq_v8i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -12062,54 +12500,54 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi370:
+; NoVLX-NEXT: .Lcfi440:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi371:
+; NoVLX-NEXT: .Lcfi441:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi372:
+; NoVLX-NEXT: .Lcfi442:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -12120,6 +12558,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -12144,54 +12583,54 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi373:
+; NoVLX-NEXT: .Lcfi443:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi374:
+; NoVLX-NEXT: .Lcfi444:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi375:
+; NoVLX-NEXT: .Lcfi445:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -12202,6 +12641,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -12227,53 +12667,53 @@ define zeroext i64 @test_vpcmpeqq_v8i1_v
; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi376:
+; NoVLX-NEXT: .Lcfi446:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi377:
+; NoVLX-NEXT: .Lcfi447:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi378:
+; NoVLX-NEXT: .Lcfi448:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -12284,6 +12724,7 @@ define zeroext i64 @test_vpcmpeqq_v8i1_v
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -12308,54 +12749,54 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi379:
+; NoVLX-NEXT: .Lcfi449:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi380:
+; NoVLX-NEXT: .Lcfi450:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi381:
+; NoVLX-NEXT: .Lcfi451:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -12366,6 +12807,7 @@ define zeroext i64 @test_masked_vpcmpeqq
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -12391,15 +12833,30 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi382:
+; NoVLX-NEXT: .Lcfi452:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi383:
+; NoVLX-NEXT: .Lcfi453:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi384:
+; NoVLX-NEXT: .Lcfi454:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi455:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi456:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi457:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi458:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi459:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -12408,64 +12865,64 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -12474,8 +12931,14 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -12496,15 +12959,30 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi385:
+; NoVLX-NEXT: .Lcfi460:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi386:
+; NoVLX-NEXT: .Lcfi461:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi387:
+; NoVLX-NEXT: .Lcfi462:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi463:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi464:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi465:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi466:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi467:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -12513,64 +12991,64 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -12579,8 +13057,14 @@ define zeroext i32 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -12603,15 +13087,30 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi388:
+; NoVLX-NEXT: .Lcfi468:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi389:
+; NoVLX-NEXT: .Lcfi469:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi390:
+; NoVLX-NEXT: .Lcfi470:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi471:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi472:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi473:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi474:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi475:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -12621,64 +13120,64 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -12687,8 +13186,14 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -12712,15 +13217,30 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi391:
+; NoVLX-NEXT: .Lcfi476:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi392:
+; NoVLX-NEXT: .Lcfi477:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi393:
+; NoVLX-NEXT: .Lcfi478:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi479:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi480:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi481:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi482:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi483:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -12730,64 +13250,64 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -12796,8 +13316,14 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -12822,12 +13348,12 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi394:
+; NoVLX-NEXT: .Lcfi484:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi395:
+; NoVLX-NEXT: .Lcfi485:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi396:
+; NoVLX-NEXT: .Lcfi486:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -12836,20 +13362,24 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi397:
+; NoVLX-NEXT: .Lcfi487:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi398:
+; NoVLX-NEXT: .Lcfi488:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi399:
+; NoVLX-NEXT: .Lcfi489:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi400:
+; NoVLX-NEXT: .Lcfi490:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi401:
+; NoVLX-NEXT: .Lcfi491:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -12892,11 +13422,11 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -12908,15 +13438,11 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -12932,6 +13458,7 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -12952,12 +13479,12 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi402:
+; NoVLX-NEXT: .Lcfi492:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi403:
+; NoVLX-NEXT: .Lcfi493:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi404:
+; NoVLX-NEXT: .Lcfi494:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -12966,20 +13493,24 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi405:
+; NoVLX-NEXT: .Lcfi495:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi406:
+; NoVLX-NEXT: .Lcfi496:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi407:
+; NoVLX-NEXT: .Lcfi497:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi408:
+; NoVLX-NEXT: .Lcfi498:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi409:
+; NoVLX-NEXT: .Lcfi499:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -13022,11 +13553,11 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -13038,15 +13569,11 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -13062,6 +13589,7 @@ define zeroext i64 @test_vpcmpsgtb_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -13084,12 +13612,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi410:
+; NoVLX-NEXT: .Lcfi500:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi411:
+; NoVLX-NEXT: .Lcfi501:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi412:
+; NoVLX-NEXT: .Lcfi502:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -13098,21 +13626,25 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi413:
+; NoVLX-NEXT: .Lcfi503:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi414:
+; NoVLX-NEXT: .Lcfi504:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi415:
+; NoVLX-NEXT: .Lcfi505:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi416:
+; NoVLX-NEXT: .Lcfi506:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi417:
+; NoVLX-NEXT: .Lcfi507:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -13155,11 +13687,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -13171,15 +13703,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -13195,6 +13723,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -13218,12 +13747,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi418:
+; NoVLX-NEXT: .Lcfi508:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi419:
+; NoVLX-NEXT: .Lcfi509:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi420:
+; NoVLX-NEXT: .Lcfi510:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -13232,21 +13761,25 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi421:
+; NoVLX-NEXT: .Lcfi511:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi422:
+; NoVLX-NEXT: .Lcfi512:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi423:
+; NoVLX-NEXT: .Lcfi513:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi424:
+; NoVLX-NEXT: .Lcfi514:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi425:
+; NoVLX-NEXT: .Lcfi515:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -13289,11 +13822,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -13305,15 +13838,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -13329,6 +13858,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -13354,12 +13884,12 @@ define zeroext i64 @test_vpcmpsgtb_v32i1
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi426:
+; NoVLX-NEXT: .Lcfi516:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi427:
+; NoVLX-NEXT: .Lcfi517:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi428:
+; NoVLX-NEXT: .Lcfi518:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -13382,6 +13912,7 @@ define zeroext i64 @test_vpcmpsgtb_v32i1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -13403,12 +13934,12 @@ define zeroext i64 @test_vpcmpsgtb_v32i1
; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi429:
+; NoVLX-NEXT: .Lcfi519:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi430:
+; NoVLX-NEXT: .Lcfi520:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi431:
+; NoVLX-NEXT: .Lcfi521:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -13431,6 +13962,7 @@ define zeroext i64 @test_vpcmpsgtb_v32i1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -13454,12 +13986,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi432:
+; NoVLX-NEXT: .Lcfi522:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi433:
+; NoVLX-NEXT: .Lcfi523:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi434:
+; NoVLX-NEXT: .Lcfi524:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -13491,6 +14023,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -13515,12 +14048,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi435:
+; NoVLX-NEXT: .Lcfi525:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi436:
+; NoVLX-NEXT: .Lcfi526:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi437:
+; NoVLX-NEXT: .Lcfi527:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -13552,6 +14085,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -13584,6 +14118,7 @@ define zeroext i16 @test_vpcmpsgtw_v8i1_
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -13612,6 +14147,7 @@ define zeroext i16 @test_vpcmpsgtw_v8i1_
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -13643,6 +14179,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -13675,6 +14212,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -13699,12 +14237,12 @@ define zeroext i32 @test_vpcmpsgtw_v8i1_
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi438:
+; NoVLX-NEXT: .Lcfi528:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi439:
+; NoVLX-NEXT: .Lcfi529:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi440:
+; NoVLX-NEXT: .Lcfi530:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -13754,6 +14292,7 @@ define zeroext i32 @test_vpcmpsgtw_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -13774,12 +14313,12 @@ define zeroext i32 @test_vpcmpsgtw_v8i1_
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi441:
+; NoVLX-NEXT: .Lcfi531:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi442:
+; NoVLX-NEXT: .Lcfi532:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi443:
+; NoVLX-NEXT: .Lcfi533:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -13829,6 +14368,7 @@ define zeroext i32 @test_vpcmpsgtw_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -13851,12 +14391,12 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi444:
+; NoVLX-NEXT: .Lcfi534:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi445:
+; NoVLX-NEXT: .Lcfi535:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi446:
+; NoVLX-NEXT: .Lcfi536:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -13907,6 +14447,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -13930,12 +14471,12 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi447:
+; NoVLX-NEXT: .Lcfi537:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi448:
+; NoVLX-NEXT: .Lcfi538:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi449:
+; NoVLX-NEXT: .Lcfi539:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -13986,6 +14527,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -14010,12 +14552,12 @@ define zeroext i64 @test_vpcmpsgtw_v8i1_
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi450:
+; NoVLX-NEXT: .Lcfi540:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi451:
+; NoVLX-NEXT: .Lcfi541:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi452:
+; NoVLX-NEXT: .Lcfi542:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -14023,43 +14565,43 @@ define zeroext i64 @test_vpcmpsgtw_v8i1_
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -14070,6 +14612,7 @@ define zeroext i64 @test_vpcmpsgtw_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -14090,12 +14633,12 @@ define zeroext i64 @test_vpcmpsgtw_v8i1_
; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi453:
+; NoVLX-NEXT: .Lcfi543:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi454:
+; NoVLX-NEXT: .Lcfi544:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi455:
+; NoVLX-NEXT: .Lcfi545:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -14103,43 +14646,43 @@ define zeroext i64 @test_vpcmpsgtw_v8i1_
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -14150,6 +14693,7 @@ define zeroext i64 @test_vpcmpsgtw_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -14172,12 +14716,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi456:
+; NoVLX-NEXT: .Lcfi546:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi457:
+; NoVLX-NEXT: .Lcfi547:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi458:
+; NoVLX-NEXT: .Lcfi548:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -14186,43 +14730,43 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -14233,6 +14777,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -14256,12 +14801,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi459:
+; NoVLX-NEXT: .Lcfi549:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi460:
+; NoVLX-NEXT: .Lcfi550:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi461:
+; NoVLX-NEXT: .Lcfi551:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -14270,43 +14815,43 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -14317,6 +14862,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -14342,15 +14888,30 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi462:
+; NoVLX-NEXT: .Lcfi552:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi463:
+; NoVLX-NEXT: .Lcfi553:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi464:
+; NoVLX-NEXT: .Lcfi554:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi555:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi556:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi557:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi558:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi559:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -14359,64 +14920,64 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -14425,8 +14986,14 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -14448,15 +15015,30 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi465:
+; NoVLX-NEXT: .Lcfi560:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi466:
+; NoVLX-NEXT: .Lcfi561:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi467:
+; NoVLX-NEXT: .Lcfi562:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi563:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi564:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi565:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi566:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi567:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -14465,64 +15047,64 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -14531,8 +15113,14 @@ define zeroext i32 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -14556,15 +15144,30 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi468:
+; NoVLX-NEXT: .Lcfi568:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi469:
+; NoVLX-NEXT: .Lcfi569:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi470:
+; NoVLX-NEXT: .Lcfi570:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi571:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi572:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi573:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi574:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi575:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -14574,74 +15177,80 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -14666,15 +15275,30 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi471:
+; NoVLX-NEXT: .Lcfi576:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi472:
+; NoVLX-NEXT: .Lcfi577:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi473:
+; NoVLX-NEXT: .Lcfi578:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi579:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi580:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi581:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi582:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi583:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
@@ -14684,64 +15308,64 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -14750,8 +15374,14 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -14777,12 +15407,12 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi474:
+; NoVLX-NEXT: .Lcfi584:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi475:
+; NoVLX-NEXT: .Lcfi585:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi476:
+; NoVLX-NEXT: .Lcfi586:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -14791,20 +15421,24 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi477:
+; NoVLX-NEXT: .Lcfi587:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi478:
+; NoVLX-NEXT: .Lcfi588:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi479:
+; NoVLX-NEXT: .Lcfi589:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi480:
+; NoVLX-NEXT: .Lcfi590:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi481:
+; NoVLX-NEXT: .Lcfi591:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -14847,11 +15481,11 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -14863,15 +15497,11 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -14887,6 +15517,7 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -14908,12 +15539,12 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi482:
+; NoVLX-NEXT: .Lcfi592:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi483:
+; NoVLX-NEXT: .Lcfi593:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi484:
+; NoVLX-NEXT: .Lcfi594:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -14922,20 +15553,24 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi485:
+; NoVLX-NEXT: .Lcfi595:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi486:
+; NoVLX-NEXT: .Lcfi596:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi487:
+; NoVLX-NEXT: .Lcfi597:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi488:
+; NoVLX-NEXT: .Lcfi598:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi489:
+; NoVLX-NEXT: .Lcfi599:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -14978,11 +15613,11 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -14994,15 +15629,11 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -15018,6 +15649,7 @@ define zeroext i64 @test_vpcmpsgtw_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -15041,12 +15673,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi490:
+; NoVLX-NEXT: .Lcfi600:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi491:
+; NoVLX-NEXT: .Lcfi601:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi492:
+; NoVLX-NEXT: .Lcfi602:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -15055,21 +15687,25 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi493:
+; NoVLX-NEXT: .Lcfi603:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi494:
+; NoVLX-NEXT: .Lcfi604:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi495:
+; NoVLX-NEXT: .Lcfi605:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi496:
+; NoVLX-NEXT: .Lcfi606:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi497:
+; NoVLX-NEXT: .Lcfi607:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -15112,11 +15748,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -15128,15 +15764,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -15152,6 +15784,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -15176,12 +15809,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi498:
+; NoVLX-NEXT: .Lcfi608:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi499:
+; NoVLX-NEXT: .Lcfi609:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi500:
+; NoVLX-NEXT: .Lcfi610:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -15190,21 +15823,25 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi501:
+; NoVLX-NEXT: .Lcfi611:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi502:
+; NoVLX-NEXT: .Lcfi612:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi503:
+; NoVLX-NEXT: .Lcfi613:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi504:
+; NoVLX-NEXT: .Lcfi614:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi505:
+; NoVLX-NEXT: .Lcfi615:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -15247,11 +15884,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -15263,15 +15900,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -15287,6 +15920,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -15312,58 +15946,62 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi506:
+; NoVLX-NEXT: .Lcfi616:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi507:
+; NoVLX-NEXT: .Lcfi617:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi508:
+; NoVLX-NEXT: .Lcfi618:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
@@ -15371,194 +16009,190 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm3, %ymm0
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm8, %ymm1
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm2, %ymm2
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
+; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
@@ -15642,6 +16276,7 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -15663,68 +16298,69 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi509:
+; NoVLX-NEXT: .Lcfi619:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi510:
+; NoVLX-NEXT: .Lcfi620:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi511:
+; NoVLX-NEXT: .Lcfi621:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -15732,8 +16368,7 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -15743,19 +16378,19 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1
; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
@@ -15908,6 +16543,7 @@ define zeroext i64 @test_vpcmpsgtw_v32i1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -15931,12 +16567,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi512:
+; NoVLX-NEXT: .Lcfi622:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi513:
+; NoVLX-NEXT: .Lcfi623:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi514:
+; NoVLX-NEXT: .Lcfi624:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -15947,12 +16583,17 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
@@ -15960,10 +16601,9 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm4
@@ -15981,40 +16621,39 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm5
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
@@ -16022,72 +16661,69 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm5
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm6
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm6, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm6
-; NoVLX-NEXT: vmovq %xmm6, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm7, %xmm7
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm6
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm6, %xmm6
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
@@ -16096,13 +16732,18 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
-; NoVLX-NEXT: vinserti128 $1, %xmm8, %ymm3, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm4
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
@@ -16111,152 +16752,147 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
-; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm1, %ymm2
-; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm5, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -16270,6 +16906,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -16294,12 +16931,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi515:
+; NoVLX-NEXT: .Lcfi625:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi516:
+; NoVLX-NEXT: .Lcfi626:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi517:
+; NoVLX-NEXT: .Lcfi627:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -16311,6 +16948,8 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
@@ -16323,20 +16962,19 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
@@ -16344,7 +16982,6 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
@@ -16367,174 +17004,174 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm4
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
-; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm3, %ymm3
-; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
-; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
-; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm2
+; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm2, %ymm2
+; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm1
-; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -16548,6 +17185,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -16589,8 +17227,8 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -16607,6 +17245,7 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16644,8 +17283,8 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -16662,6 +17301,7 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16684,6 +17324,7 @@ define zeroext i8 @test_masked_vpcmpsgtd
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -16695,14 +17336,13 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -16719,8 +17359,8 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -16737,6 +17377,7 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16796,8 +17437,8 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -16814,6 +17455,7 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16857,8 +17499,8 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -16875,6 +17517,7 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16899,6 +17542,7 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -16910,14 +17554,13 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -16934,8 +17577,8 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -16952,6 +17595,7 @@ define zeroext i8 @test_masked_vpcmpsgtd
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -16994,8 +17638,8 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -17012,6 +17656,7 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17048,8 +17693,8 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -17066,6 +17711,7 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17088,6 +17734,7 @@ define zeroext i16 @test_masked_vpcmpsgt
;
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -17099,14 +17746,13 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -17122,8 +17768,8 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -17140,6 +17786,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17198,8 +17845,8 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -17216,6 +17863,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17258,8 +17906,8 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -17276,6 +17924,7 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17300,6 +17949,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -17311,14 +17961,13 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -17334,8 +17983,8 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -17352,6 +18001,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17378,12 +18028,12 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi518:
+; NoVLX-NEXT: .Lcfi628:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi519:
+; NoVLX-NEXT: .Lcfi629:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi520:
+; NoVLX-NEXT: .Lcfi630:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -17401,6 +18051,7 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17421,12 +18072,12 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi521:
+; NoVLX-NEXT: .Lcfi631:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi522:
+; NoVLX-NEXT: .Lcfi632:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi523:
+; NoVLX-NEXT: .Lcfi633:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -17444,6 +18095,7 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17466,15 +18118,16 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi524:
+; NoVLX-NEXT: .Lcfi634:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi525:
+; NoVLX-NEXT: .Lcfi635:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi526:
+; NoVLX-NEXT: .Lcfi636:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -17486,14 +18139,13 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -17507,6 +18159,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17531,12 +18184,12 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi527:
+; NoVLX-NEXT: .Lcfi637:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi528:
+; NoVLX-NEXT: .Lcfi638:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi529:
+; NoVLX-NEXT: .Lcfi639:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -17572,6 +18225,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17597,12 +18251,12 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi530:
+; NoVLX-NEXT: .Lcfi640:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi531:
+; NoVLX-NEXT: .Lcfi641:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi532:
+; NoVLX-NEXT: .Lcfi642:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -17621,6 +18275,7 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17644,16 +18299,17 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi533:
+; NoVLX-NEXT: .Lcfi643:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi534:
+; NoVLX-NEXT: .Lcfi644:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi535:
+; NoVLX-NEXT: .Lcfi645:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -17665,14 +18321,13 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -17686,6 +18341,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17712,20 +18368,20 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi536:
+; NoVLX-NEXT: .Lcfi646:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi537:
+; NoVLX-NEXT: .Lcfi647:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi538:
+; NoVLX-NEXT: .Lcfi648:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -17741,6 +18397,7 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17761,20 +18418,20 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi539:
+; NoVLX-NEXT: .Lcfi649:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi540:
+; NoVLX-NEXT: .Lcfi650:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi541:
+; NoVLX-NEXT: .Lcfi651:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -17790,6 +18447,7 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17812,18 +18470,19 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi542:
+; NoVLX-NEXT: .Lcfi652:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi543:
+; NoVLX-NEXT: .Lcfi653:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi544:
+; NoVLX-NEXT: .Lcfi654:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -17836,14 +18495,13 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -17859,6 +18517,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17883,18 +18542,19 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi545:
+; NoVLX-NEXT: .Lcfi655:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi546:
+; NoVLX-NEXT: .Lcfi656:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi547:
+; NoVLX-NEXT: .Lcfi657:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -17907,14 +18567,13 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -17930,6 +18589,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -17955,12 +18615,12 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_
; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi548:
+; NoVLX-NEXT: .Lcfi658:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi549:
+; NoVLX-NEXT: .Lcfi659:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi550:
+; NoVLX-NEXT: .Lcfi660:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -17968,8 +18628,8 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -17985,6 +18645,7 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -18008,12 +18669,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi551:
+; NoVLX-NEXT: .Lcfi661:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi552:
+; NoVLX-NEXT: .Lcfi662:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi553:
+; NoVLX-NEXT: .Lcfi663:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -18021,6 +18682,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -18033,14 +18695,13 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -18056,6 +18717,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -18090,6 +18752,7 @@ define zeroext i16 @test_vpcmpsgtd_v8i1_
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18118,6 +18781,7 @@ define zeroext i16 @test_vpcmpsgtd_v8i1_
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18149,6 +18813,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18181,6 +18846,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18213,6 +18879,7 @@ define zeroext i16 @test_vpcmpsgtd_v8i1_
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18245,6 +18912,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18271,12 +18939,12 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi554:
+; NoVLX-NEXT: .Lcfi664:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi555:
+; NoVLX-NEXT: .Lcfi665:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi556:
+; NoVLX-NEXT: .Lcfi666:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -18325,6 +18993,7 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18346,12 +19015,12 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi557:
+; NoVLX-NEXT: .Lcfi667:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi558:
+; NoVLX-NEXT: .Lcfi668:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi559:
+; NoVLX-NEXT: .Lcfi669:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -18400,6 +19069,7 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18423,12 +19093,12 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi560:
+; NoVLX-NEXT: .Lcfi670:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi561:
+; NoVLX-NEXT: .Lcfi671:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi562:
+; NoVLX-NEXT: .Lcfi672:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -18478,6 +19148,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18502,12 +19173,12 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi563:
+; NoVLX-NEXT: .Lcfi673:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi564:
+; NoVLX-NEXT: .Lcfi674:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi565:
+; NoVLX-NEXT: .Lcfi675:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -18557,6 +19228,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18582,12 +19254,12 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi566:
+; NoVLX-NEXT: .Lcfi676:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi567:
+; NoVLX-NEXT: .Lcfi677:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi568:
+; NoVLX-NEXT: .Lcfi678:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -18636,6 +19308,7 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18660,12 +19333,12 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi569:
+; NoVLX-NEXT: .Lcfi679:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi570:
+; NoVLX-NEXT: .Lcfi680:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi571:
+; NoVLX-NEXT: .Lcfi681:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -18715,6 +19388,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18741,56 +19415,56 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi572:
+; NoVLX-NEXT: .Lcfi682:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi573:
+; NoVLX-NEXT: .Lcfi683:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi574:
+; NoVLX-NEXT: .Lcfi684:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
@@ -18800,6 +19474,7 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18821,55 +19496,55 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi575:
+; NoVLX-NEXT: .Lcfi685:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi576:
+; NoVLX-NEXT: .Lcfi686:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi577:
+; NoVLX-NEXT: .Lcfi687:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -18880,6 +19555,7 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18903,12 +19579,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi578:
+; NoVLX-NEXT: .Lcfi688:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi579:
+; NoVLX-NEXT: .Lcfi689:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi580:
+; NoVLX-NEXT: .Lcfi690:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -18916,43 +19592,43 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -18963,6 +19639,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -18987,12 +19664,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi581:
+; NoVLX-NEXT: .Lcfi691:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi582:
+; NoVLX-NEXT: .Lcfi692:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi583:
+; NoVLX-NEXT: .Lcfi693:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -19000,43 +19677,43 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -19047,6 +19724,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -19072,55 +19750,55 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_
; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi584:
+; NoVLX-NEXT: .Lcfi694:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi585:
+; NoVLX-NEXT: .Lcfi695:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi586:
+; NoVLX-NEXT: .Lcfi696:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -19131,6 +19809,7 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -19155,12 +19834,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi587:
+; NoVLX-NEXT: .Lcfi697:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi588:
+; NoVLX-NEXT: .Lcfi698:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi589:
+; NoVLX-NEXT: .Lcfi699:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -19168,43 +19847,43 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -19215,6 +19894,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -19241,78 +19921,93 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi590:
+; NoVLX-NEXT: .Lcfi700:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi591:
+; NoVLX-NEXT: .Lcfi701:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi592:
+; NoVLX-NEXT: .Lcfi702:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi703:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi704:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi705:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi706:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi707:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -19321,8 +20016,14 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -19344,78 +20045,93 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi593:
+; NoVLX-NEXT: .Lcfi708:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi594:
+; NoVLX-NEXT: .Lcfi709:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi595:
+; NoVLX-NEXT: .Lcfi710:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi711:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi712:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi713:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi714:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi715:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -19424,8 +20140,14 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -19449,79 +20171,94 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi596:
+; NoVLX-NEXT: .Lcfi716:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi597:
+; NoVLX-NEXT: .Lcfi717:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi598:
+; NoVLX-NEXT: .Lcfi718:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi719:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi720:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi721:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi722:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi723:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -19530,8 +20267,14 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -19556,79 +20299,94 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi599:
+; NoVLX-NEXT: .Lcfi724:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi600:
+; NoVLX-NEXT: .Lcfi725:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi601:
+; NoVLX-NEXT: .Lcfi726:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi727:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi728:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi729:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi730:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi731:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -19637,8 +20395,14 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -19664,78 +20428,93 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi602:
+; NoVLX-NEXT: .Lcfi732:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi603:
+; NoVLX-NEXT: .Lcfi733:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi604:
+; NoVLX-NEXT: .Lcfi734:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi735:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi736:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi737:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi738:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi739:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -19744,8 +20523,14 @@ define zeroext i32 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -19770,79 +20555,94 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi605:
+; NoVLX-NEXT: .Lcfi740:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi606:
+; NoVLX-NEXT: .Lcfi741:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi607:
+; NoVLX-NEXT: .Lcfi742:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi743:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi744:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi745:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi746:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi747:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -19851,8 +20651,14 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -19879,12 +20685,12 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi608:
+; NoVLX-NEXT: .Lcfi748:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi609:
+; NoVLX-NEXT: .Lcfi749:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi610:
+; NoVLX-NEXT: .Lcfi750:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -19893,17 +20699,21 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi611:
+; NoVLX-NEXT: .Lcfi751:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi612:
+; NoVLX-NEXT: .Lcfi752:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi613:
+; NoVLX-NEXT: .Lcfi753:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi614:
+; NoVLX-NEXT: .Lcfi754:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi615:
+; NoVLX-NEXT: .Lcfi755:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -19946,11 +20756,11 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -19962,15 +20772,11 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -19986,6 +20792,7 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -20007,12 +20814,12 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi616:
+; NoVLX-NEXT: .Lcfi756:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi617:
+; NoVLX-NEXT: .Lcfi757:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi618:
+; NoVLX-NEXT: .Lcfi758:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -20021,17 +20828,21 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi619:
+; NoVLX-NEXT: .Lcfi759:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi620:
+; NoVLX-NEXT: .Lcfi760:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi621:
+; NoVLX-NEXT: .Lcfi761:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi622:
+; NoVLX-NEXT: .Lcfi762:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi623:
+; NoVLX-NEXT: .Lcfi763:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -20074,11 +20885,11 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -20090,15 +20901,11 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -20114,6 +20921,7 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -20137,12 +20945,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi624:
+; NoVLX-NEXT: .Lcfi764:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi625:
+; NoVLX-NEXT: .Lcfi765:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi626:
+; NoVLX-NEXT: .Lcfi766:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -20151,18 +20959,22 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi627:
+; NoVLX-NEXT: .Lcfi767:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi628:
+; NoVLX-NEXT: .Lcfi768:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi629:
+; NoVLX-NEXT: .Lcfi769:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi630:
+; NoVLX-NEXT: .Lcfi770:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi631:
+; NoVLX-NEXT: .Lcfi771:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -20205,11 +21017,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -20221,15 +21033,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -20245,6 +21053,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -20269,12 +21078,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi632:
+; NoVLX-NEXT: .Lcfi772:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi633:
+; NoVLX-NEXT: .Lcfi773:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi634:
+; NoVLX-NEXT: .Lcfi774:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -20283,18 +21092,22 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi635:
+; NoVLX-NEXT: .Lcfi775:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi636:
+; NoVLX-NEXT: .Lcfi776:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi637:
+; NoVLX-NEXT: .Lcfi777:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi638:
+; NoVLX-NEXT: .Lcfi778:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi639:
+; NoVLX-NEXT: .Lcfi779:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -20337,11 +21150,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -20353,15 +21166,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -20377,6 +21186,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -20402,12 +21212,12 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi640:
+; NoVLX-NEXT: .Lcfi780:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi641:
+; NoVLX-NEXT: .Lcfi781:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi642:
+; NoVLX-NEXT: .Lcfi782:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -20416,17 +21226,21 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi643:
+; NoVLX-NEXT: .Lcfi783:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi644:
+; NoVLX-NEXT: .Lcfi784:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi645:
+; NoVLX-NEXT: .Lcfi785:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi646:
+; NoVLX-NEXT: .Lcfi786:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi647:
+; NoVLX-NEXT: .Lcfi787:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -20469,11 +21283,11 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -20485,15 +21299,11 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -20509,6 +21319,7 @@ define zeroext i64 @test_vpcmpsgtd_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -20533,12 +21344,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi648:
+; NoVLX-NEXT: .Lcfi788:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi649:
+; NoVLX-NEXT: .Lcfi789:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi650:
+; NoVLX-NEXT: .Lcfi790:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -20547,18 +21358,22 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi651:
+; NoVLX-NEXT: .Lcfi791:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi652:
+; NoVLX-NEXT: .Lcfi792:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi653:
+; NoVLX-NEXT: .Lcfi793:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi654:
+; NoVLX-NEXT: .Lcfi794:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi655:
+; NoVLX-NEXT: .Lcfi795:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -20601,11 +21416,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -20617,15 +21432,11 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -20641,6 +21452,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -20673,6 +21485,7 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -20700,6 +21513,7 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -20722,6 +21536,7 @@ define zeroext i4 @test_masked_vpcmpsgtq
;
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -20729,16 +21544,16 @@ define zeroext i4 @test_masked_vpcmpsgtq
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -20780,6 +21595,7 @@ define zeroext i4 @test_masked_vpcmpsgtq
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -20813,6 +21629,7 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -20837,6 +21654,7 @@ define zeroext i4 @test_masked_vpcmpsgtq
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -20844,16 +21662,16 @@ define zeroext i4 @test_masked_vpcmpsgtq
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -20899,6 +21717,7 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -20938,6 +21757,7 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -20989,6 +21809,7 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21042,6 +21863,7 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21087,6 +21909,7 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21140,6 +21963,7 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21184,6 +22008,7 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21222,6 +22047,7 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21272,6 +22098,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21324,6 +22151,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21368,6 +22196,7 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21420,6 +22249,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21446,12 +22276,12 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi656:
+; NoVLX-NEXT: .Lcfi796:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi657:
+; NoVLX-NEXT: .Lcfi797:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi658:
+; NoVLX-NEXT: .Lcfi798:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -21469,6 +22299,7 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21489,12 +22320,12 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi659:
+; NoVLX-NEXT: .Lcfi799:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi660:
+; NoVLX-NEXT: .Lcfi800:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi661:
+; NoVLX-NEXT: .Lcfi801:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -21512,6 +22343,7 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21534,15 +22366,16 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi662:
+; NoVLX-NEXT: .Lcfi802:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi663:
+; NoVLX-NEXT: .Lcfi803:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi664:
+; NoVLX-NEXT: .Lcfi804:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -21550,10 +22383,9 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -21567,6 +22399,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21591,12 +22424,12 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi665:
+; NoVLX-NEXT: .Lcfi805:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi666:
+; NoVLX-NEXT: .Lcfi806:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi667:
+; NoVLX-NEXT: .Lcfi807:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -21624,6 +22457,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21649,12 +22483,12 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi668:
+; NoVLX-NEXT: .Lcfi808:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi669:
+; NoVLX-NEXT: .Lcfi809:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi670:
+; NoVLX-NEXT: .Lcfi810:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -21673,6 +22507,7 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21696,16 +22531,17 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi671:
+; NoVLX-NEXT: .Lcfi811:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi672:
+; NoVLX-NEXT: .Lcfi812:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi673:
+; NoVLX-NEXT: .Lcfi813:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -21713,10 +22549,9 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
-; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -21730,6 +22565,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21756,20 +22592,20 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi674:
+; NoVLX-NEXT: .Lcfi814:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi675:
+; NoVLX-NEXT: .Lcfi815:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi676:
+; NoVLX-NEXT: .Lcfi816:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -21785,6 +22621,7 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21805,20 +22642,20 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi677:
+; NoVLX-NEXT: .Lcfi817:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi678:
+; NoVLX-NEXT: .Lcfi818:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi679:
+; NoVLX-NEXT: .Lcfi819:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -21834,6 +22671,7 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21856,12 +22694,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi680:
+; NoVLX-NEXT: .Lcfi820:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi681:
+; NoVLX-NEXT: .Lcfi821:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi682:
+; NoVLX-NEXT: .Lcfi822:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -21878,8 +22716,8 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -21895,6 +22733,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21919,12 +22758,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi683:
+; NoVLX-NEXT: .Lcfi823:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi684:
+; NoVLX-NEXT: .Lcfi824:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi685:
+; NoVLX-NEXT: .Lcfi825:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -21941,8 +22780,8 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -21958,6 +22797,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -21983,12 +22823,12 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_
; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi686:
+; NoVLX-NEXT: .Lcfi826:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi687:
+; NoVLX-NEXT: .Lcfi827:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi688:
+; NoVLX-NEXT: .Lcfi828:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -21996,8 +22836,8 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -22013,6 +22853,7 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -22036,12 +22877,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi689:
+; NoVLX-NEXT: .Lcfi829:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi690:
+; NoVLX-NEXT: .Lcfi830:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi691:
+; NoVLX-NEXT: .Lcfi831:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -22059,8 +22900,8 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -22076,6 +22917,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -22121,8 +22963,8 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -22139,6 +22981,7 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22178,8 +23021,8 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -22196,6 +23039,7 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22220,6 +23064,7 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -22237,7 +23082,6 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -22255,8 +23099,8 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -22273,6 +23117,7 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22299,6 +23144,7 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -22316,7 +23162,6 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -22334,8 +23179,8 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -22352,6 +23197,7 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22397,8 +23243,8 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -22415,6 +23261,7 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22441,6 +23288,7 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -22458,7 +23306,6 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -22476,8 +23323,8 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -22494,6 +23341,7 @@ define zeroext i8 @test_masked_vpcmpsgtq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22538,8 +23386,8 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -22556,6 +23404,7 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22594,8 +23443,8 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -22612,6 +23461,7 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22636,6 +23486,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -22653,7 +23504,6 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -22670,8 +23520,8 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -22688,6 +23538,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22714,6 +23565,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -22731,7 +23583,6 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -22748,8 +23599,8 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -22766,6 +23617,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22810,8 +23662,8 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -22828,6 +23680,7 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22854,6 +23707,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -22871,7 +23725,6 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -22888,8 +23741,8 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -22906,6 +23759,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22933,12 +23787,12 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi692:
+; NoVLX-NEXT: .Lcfi832:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi693:
+; NoVLX-NEXT: .Lcfi833:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi694:
+; NoVLX-NEXT: .Lcfi834:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -22957,6 +23811,7 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -22978,12 +23833,12 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi695:
+; NoVLX-NEXT: .Lcfi835:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi696:
+; NoVLX-NEXT: .Lcfi836:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi697:
+; NoVLX-NEXT: .Lcfi837:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -23002,6 +23857,7 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23025,16 +23881,17 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi698:
+; NoVLX-NEXT: .Lcfi838:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi699:
+; NoVLX-NEXT: .Lcfi839:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi700:
+; NoVLX-NEXT: .Lcfi840:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -23052,7 +23909,6 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -23067,6 +23923,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23092,16 +23949,17 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi701:
+; NoVLX-NEXT: .Lcfi841:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi702:
+; NoVLX-NEXT: .Lcfi842:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi703:
+; NoVLX-NEXT: .Lcfi843:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -23119,7 +23977,6 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -23134,6 +23991,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23160,12 +24018,12 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi704:
+; NoVLX-NEXT: .Lcfi844:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi705:
+; NoVLX-NEXT: .Lcfi845:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi706:
+; NoVLX-NEXT: .Lcfi846:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -23185,6 +24043,7 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23209,17 +24068,18 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi707:
+; NoVLX-NEXT: .Lcfi847:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi708:
+; NoVLX-NEXT: .Lcfi848:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi709:
+; NoVLX-NEXT: .Lcfi849:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -23237,7 +24097,6 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -23252,6 +24111,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23279,12 +24139,12 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi710:
+; NoVLX-NEXT: .Lcfi850:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi711:
+; NoVLX-NEXT: .Lcfi851:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi712:
+; NoVLX-NEXT: .Lcfi852:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -23292,8 +24152,8 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -23309,6 +24169,7 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23330,12 +24191,12 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi713:
+; NoVLX-NEXT: .Lcfi853:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi714:
+; NoVLX-NEXT: .Lcfi854:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi715:
+; NoVLX-NEXT: .Lcfi855:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -23343,8 +24204,8 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -23360,6 +24221,7 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23383,12 +24245,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi716:
+; NoVLX-NEXT: .Lcfi856:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi717:
+; NoVLX-NEXT: .Lcfi857:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi718:
+; NoVLX-NEXT: .Lcfi858:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -23396,6 +24258,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -23408,14 +24271,13 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -23431,6 +24293,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23456,12 +24319,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi719:
+; NoVLX-NEXT: .Lcfi859:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi720:
+; NoVLX-NEXT: .Lcfi860:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi721:
+; NoVLX-NEXT: .Lcfi861:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -23469,6 +24332,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -23481,14 +24345,13 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -23504,6 +24367,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23530,12 +24394,12 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi722:
+; NoVLX-NEXT: .Lcfi862:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi723:
+; NoVLX-NEXT: .Lcfi863:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi724:
+; NoVLX-NEXT: .Lcfi864:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -23544,8 +24408,8 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -23561,6 +24425,7 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23585,12 +24450,12 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi725:
+; NoVLX-NEXT: .Lcfi865:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi726:
+; NoVLX-NEXT: .Lcfi866:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi727:
+; NoVLX-NEXT: .Lcfi867:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -23599,6 +24464,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -23611,14 +24477,13 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -23634,6 +24499,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -23664,6 +24530,7 @@ define zeroext i16 @test_vpcmpsgtq_v8i1_
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -23688,6 +24555,7 @@ define zeroext i16 @test_vpcmpsgtq_v8i1_
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -23715,6 +24583,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -23743,6 +24612,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -23771,6 +24641,7 @@ define zeroext i16 @test_vpcmpsgtq_v8i1_
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -23799,6 +24670,7 @@ define zeroext i16 @test_masked_vpcmpsgt
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -23825,12 +24697,12 @@ define zeroext i32 @test_vpcmpsgtq_v8i1_
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi728:
+; NoVLX-NEXT: .Lcfi868:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi729:
+; NoVLX-NEXT: .Lcfi869:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi730:
+; NoVLX-NEXT: .Lcfi870:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -23877,6 +24749,7 @@ define zeroext i32 @test_vpcmpsgtq_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -23898,12 +24771,12 @@ define zeroext i32 @test_vpcmpsgtq_v8i1_
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi731:
+; NoVLX-NEXT: .Lcfi871:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi732:
+; NoVLX-NEXT: .Lcfi872:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi733:
+; NoVLX-NEXT: .Lcfi873:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -23950,6 +24823,7 @@ define zeroext i32 @test_vpcmpsgtq_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -23973,12 +24847,12 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi734:
+; NoVLX-NEXT: .Lcfi874:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi735:
+; NoVLX-NEXT: .Lcfi875:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi736:
+; NoVLX-NEXT: .Lcfi876:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -24026,6 +24900,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -24050,12 +24925,12 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi737:
+; NoVLX-NEXT: .Lcfi877:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi738:
+; NoVLX-NEXT: .Lcfi878:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi739:
+; NoVLX-NEXT: .Lcfi879:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -24103,6 +24978,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -24128,12 +25004,12 @@ define zeroext i32 @test_vpcmpsgtq_v8i1_
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi740:
+; NoVLX-NEXT: .Lcfi880:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi741:
+; NoVLX-NEXT: .Lcfi881:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi742:
+; NoVLX-NEXT: .Lcfi882:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -24180,6 +25056,7 @@ define zeroext i32 @test_vpcmpsgtq_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -24204,12 +25081,12 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi743:
+; NoVLX-NEXT: .Lcfi883:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi744:
+; NoVLX-NEXT: .Lcfi884:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi745:
+; NoVLX-NEXT: .Lcfi885:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -24257,6 +25134,7 @@ define zeroext i32 @test_masked_vpcmpsgt
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -24283,53 +25161,53 @@ define zeroext i64 @test_vpcmpsgtq_v8i1_
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi746:
+; NoVLX-NEXT: .Lcfi886:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi747:
+; NoVLX-NEXT: .Lcfi887:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi748:
+; NoVLX-NEXT: .Lcfi888:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -24340,6 +25218,7 @@ define zeroext i64 @test_vpcmpsgtq_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -24361,53 +25240,53 @@ define zeroext i64 @test_vpcmpsgtq_v8i1_
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi749:
+; NoVLX-NEXT: .Lcfi889:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi750:
+; NoVLX-NEXT: .Lcfi890:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi751:
+; NoVLX-NEXT: .Lcfi891:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -24418,6 +25297,7 @@ define zeroext i64 @test_vpcmpsgtq_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -24441,54 +25321,54 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi752:
+; NoVLX-NEXT: .Lcfi892:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi753:
+; NoVLX-NEXT: .Lcfi893:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi754:
+; NoVLX-NEXT: .Lcfi894:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -24499,6 +25379,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -24523,54 +25404,54 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi755:
+; NoVLX-NEXT: .Lcfi895:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi756:
+; NoVLX-NEXT: .Lcfi896:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi757:
+; NoVLX-NEXT: .Lcfi897:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -24581,6 +25462,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -24606,53 +25488,53 @@ define zeroext i64 @test_vpcmpsgtq_v8i1_
; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi758:
+; NoVLX-NEXT: .Lcfi898:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi759:
+; NoVLX-NEXT: .Lcfi899:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi760:
+; NoVLX-NEXT: .Lcfi900:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -24663,6 +25545,7 @@ define zeroext i64 @test_vpcmpsgtq_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -24687,54 +25570,54 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi761:
+; NoVLX-NEXT: .Lcfi901:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi762:
+; NoVLX-NEXT: .Lcfi902:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi763:
+; NoVLX-NEXT: .Lcfi903:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -24745,6 +25628,7 @@ define zeroext i64 @test_masked_vpcmpsgt
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <8 x i64>
@@ -24770,15 +25654,30 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi764:
+; NoVLX-NEXT: .Lcfi904:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi765:
+; NoVLX-NEXT: .Lcfi905:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi766:
+; NoVLX-NEXT: .Lcfi906:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi907:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi908:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi909:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi910:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi911:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
@@ -24789,64 +25688,64 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -24855,8 +25754,14 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -24877,15 +25782,30 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi767:
+; NoVLX-NEXT: .Lcfi912:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi768:
+; NoVLX-NEXT: .Lcfi913:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi769:
+; NoVLX-NEXT: .Lcfi914:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi915:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi916:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi917:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi918:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi919:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -24897,64 +25817,64 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -24963,8 +25883,14 @@ define zeroext i32 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -24987,15 +25913,30 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi770:
+; NoVLX-NEXT: .Lcfi920:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi771:
+; NoVLX-NEXT: .Lcfi921:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi772:
+; NoVLX-NEXT: .Lcfi922:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi923:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi924:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi925:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi926:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi927:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
@@ -25007,64 +25948,64 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -25073,8 +26014,14 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -25098,15 +26045,30 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi773:
+; NoVLX-NEXT: .Lcfi928:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi774:
+; NoVLX-NEXT: .Lcfi929:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi775:
+; NoVLX-NEXT: .Lcfi930:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi931:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi932:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi933:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi934:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi935:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -25119,64 +26081,64 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -25185,8 +26147,14 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -25211,12 +26179,12 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi776:
+; NoVLX-NEXT: .Lcfi936:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi777:
+; NoVLX-NEXT: .Lcfi937:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi778:
+; NoVLX-NEXT: .Lcfi938:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -25225,15 +26193,15 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi779:
+; NoVLX-NEXT: .Lcfi939:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi780:
+; NoVLX-NEXT: .Lcfi940:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi781:
+; NoVLX-NEXT: .Lcfi941:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi782:
+; NoVLX-NEXT: .Lcfi942:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi783:
+; NoVLX-NEXT: .Lcfi943:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -25241,6 +26209,10 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -25283,11 +26255,11 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -25299,15 +26271,11 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -25323,6 +26291,7 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -25343,12 +26312,12 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi784:
+; NoVLX-NEXT: .Lcfi944:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi785:
+; NoVLX-NEXT: .Lcfi945:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi786:
+; NoVLX-NEXT: .Lcfi946:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -25357,15 +26326,15 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi787:
+; NoVLX-NEXT: .Lcfi947:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi788:
+; NoVLX-NEXT: .Lcfi948:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi789:
+; NoVLX-NEXT: .Lcfi949:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi790:
+; NoVLX-NEXT: .Lcfi950:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi791:
+; NoVLX-NEXT: .Lcfi951:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
@@ -25374,6 +26343,10 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -25416,11 +26389,11 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -25432,15 +26405,11 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -25456,6 +26425,7 @@ define zeroext i64 @test_vpcmpsgeb_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -25478,12 +26448,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi792:
+; NoVLX-NEXT: .Lcfi952:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi793:
+; NoVLX-NEXT: .Lcfi953:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi794:
+; NoVLX-NEXT: .Lcfi954:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -25492,15 +26462,15 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi795:
+; NoVLX-NEXT: .Lcfi955:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi796:
+; NoVLX-NEXT: .Lcfi956:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi797:
+; NoVLX-NEXT: .Lcfi957:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi798:
+; NoVLX-NEXT: .Lcfi958:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi799:
+; NoVLX-NEXT: .Lcfi959:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
@@ -25509,6 +26479,10 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -25551,11 +26525,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -25567,15 +26541,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -25591,6 +26561,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -25614,12 +26585,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi800:
+; NoVLX-NEXT: .Lcfi960:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi801:
+; NoVLX-NEXT: .Lcfi961:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi802:
+; NoVLX-NEXT: .Lcfi962:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -25628,15 +26599,15 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi803:
+; NoVLX-NEXT: .Lcfi963:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi804:
+; NoVLX-NEXT: .Lcfi964:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi805:
+; NoVLX-NEXT: .Lcfi965:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi806:
+; NoVLX-NEXT: .Lcfi966:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi807:
+; NoVLX-NEXT: .Lcfi967:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
@@ -25646,6 +26617,10 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -25688,11 +26663,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -25704,15 +26679,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -25728,6 +26699,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <16 x i8>
@@ -25753,12 +26725,12 @@ define zeroext i64 @test_vpcmpsgeb_v32i1
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi808:
+; NoVLX-NEXT: .Lcfi968:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi809:
+; NoVLX-NEXT: .Lcfi969:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi810:
+; NoVLX-NEXT: .Lcfi970:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -25783,6 +26755,7 @@ define zeroext i64 @test_vpcmpsgeb_v32i1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -25804,12 +26777,12 @@ define zeroext i64 @test_vpcmpsgeb_v32i1
; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi811:
+; NoVLX-NEXT: .Lcfi971:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi812:
+; NoVLX-NEXT: .Lcfi972:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi813:
+; NoVLX-NEXT: .Lcfi973:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -25835,6 +26808,7 @@ define zeroext i64 @test_vpcmpsgeb_v32i1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -25858,12 +26832,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi814:
+; NoVLX-NEXT: .Lcfi974:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi815:
+; NoVLX-NEXT: .Lcfi975:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi816:
+; NoVLX-NEXT: .Lcfi976:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -25897,6 +26871,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -25921,12 +26896,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi817:
+; NoVLX-NEXT: .Lcfi977:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi818:
+; NoVLX-NEXT: .Lcfi978:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi819:
+; NoVLX-NEXT: .Lcfi979:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -25961,6 +26936,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <32 x i8>
@@ -25995,6 +26971,7 @@ define zeroext i16 @test_vpcmpsgew_v8i1_
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26026,6 +27003,7 @@ define zeroext i16 @test_vpcmpsgew_v8i1_
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26059,6 +27037,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26094,6 +27073,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: kunpckbw %k0, %k1, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26118,12 +27098,12 @@ define zeroext i32 @test_vpcmpsgew_v8i1_
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi820:
+; NoVLX-NEXT: .Lcfi980:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi821:
+; NoVLX-NEXT: .Lcfi981:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi822:
+; NoVLX-NEXT: .Lcfi982:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -26175,6 +27155,7 @@ define zeroext i32 @test_vpcmpsgew_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26195,12 +27176,12 @@ define zeroext i32 @test_vpcmpsgew_v8i1_
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi823:
+; NoVLX-NEXT: .Lcfi983:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi824:
+; NoVLX-NEXT: .Lcfi984:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi825:
+; NoVLX-NEXT: .Lcfi985:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -26253,6 +27234,7 @@ define zeroext i32 @test_vpcmpsgew_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26275,12 +27257,12 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi826:
+; NoVLX-NEXT: .Lcfi986:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi827:
+; NoVLX-NEXT: .Lcfi987:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi828:
+; NoVLX-NEXT: .Lcfi988:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -26333,6 +27315,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26356,12 +27339,12 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi829:
+; NoVLX-NEXT: .Lcfi989:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi830:
+; NoVLX-NEXT: .Lcfi990:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi831:
+; NoVLX-NEXT: .Lcfi991:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -26415,6 +27398,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26439,12 +27423,12 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi832:
+; NoVLX-NEXT: .Lcfi992:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi833:
+; NoVLX-NEXT: .Lcfi993:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi834:
+; NoVLX-NEXT: .Lcfi994:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -26454,43 +27438,43 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -26501,6 +27485,7 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26521,12 +27506,12 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi835:
+; NoVLX-NEXT: .Lcfi995:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi836:
+; NoVLX-NEXT: .Lcfi996:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi837:
+; NoVLX-NEXT: .Lcfi997:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -26537,43 +27522,43 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -26584,6 +27569,7 @@ define zeroext i64 @test_vpcmpsgew_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26606,12 +27592,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi838:
+; NoVLX-NEXT: .Lcfi998:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi839:
+; NoVLX-NEXT: .Lcfi999:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi840:
+; NoVLX-NEXT: .Lcfi1000:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -26622,43 +27608,43 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -26669,6 +27655,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26692,12 +27679,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi841:
+; NoVLX-NEXT: .Lcfi1001:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi842:
+; NoVLX-NEXT: .Lcfi1002:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi843:
+; NoVLX-NEXT: .Lcfi1003:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -26709,43 +27696,43 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -26756,6 +27743,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <8 x i16>
@@ -26781,15 +27769,30 @@ define zeroext i32 @test_vpcmpsgew_v16i1
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi844:
+; NoVLX-NEXT: .Lcfi1004:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi845:
+; NoVLX-NEXT: .Lcfi1005:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi846:
+; NoVLX-NEXT: .Lcfi1006:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1007:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1008:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1009:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1010:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1011:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
@@ -26800,64 +27803,64 @@ define zeroext i32 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -26866,8 +27869,14 @@ define zeroext i32 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -26889,15 +27898,30 @@ define zeroext i32 @test_vpcmpsgew_v16i1
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi847:
+; NoVLX-NEXT: .Lcfi1012:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi848:
+; NoVLX-NEXT: .Lcfi1013:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi849:
+; NoVLX-NEXT: .Lcfi1014:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1015:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1016:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1017:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1018:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1019:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
@@ -26909,64 +27933,64 @@ define zeroext i32 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -26975,8 +27999,14 @@ define zeroext i32 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -27000,15 +28030,30 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi850:
+; NoVLX-NEXT: .Lcfi1020:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi851:
+; NoVLX-NEXT: .Lcfi1021:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi852:
+; NoVLX-NEXT: .Lcfi1022:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1023:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1024:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1025:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1026:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1027:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
@@ -27020,64 +28065,64 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -27086,8 +28131,14 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -27112,15 +28163,30 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi853:
+; NoVLX-NEXT: .Lcfi1028:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi854:
+; NoVLX-NEXT: .Lcfi1029:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi855:
+; NoVLX-NEXT: .Lcfi1030:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1031:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1032:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1033:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1034:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1035:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
@@ -27133,64 +28199,64 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -27199,8 +28265,14 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -27226,12 +28298,12 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi856:
+; NoVLX-NEXT: .Lcfi1036:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi857:
+; NoVLX-NEXT: .Lcfi1037:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi858:
+; NoVLX-NEXT: .Lcfi1038:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -27240,15 +28312,15 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi859:
+; NoVLX-NEXT: .Lcfi1039:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi860:
+; NoVLX-NEXT: .Lcfi1040:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi861:
+; NoVLX-NEXT: .Lcfi1041:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi862:
+; NoVLX-NEXT: .Lcfi1042:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi863:
+; NoVLX-NEXT: .Lcfi1043:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
@@ -27256,6 +28328,10 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -27298,11 +28374,11 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -27314,15 +28390,11 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -27338,6 +28410,7 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -27359,12 +28432,12 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi864:
+; NoVLX-NEXT: .Lcfi1044:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi865:
+; NoVLX-NEXT: .Lcfi1045:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi866:
+; NoVLX-NEXT: .Lcfi1046:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -27373,15 +28446,15 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi867:
+; NoVLX-NEXT: .Lcfi1047:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi868:
+; NoVLX-NEXT: .Lcfi1048:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi869:
+; NoVLX-NEXT: .Lcfi1049:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi870:
+; NoVLX-NEXT: .Lcfi1050:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi871:
+; NoVLX-NEXT: .Lcfi1051:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
@@ -27390,6 +28463,10 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -27432,11 +28509,11 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -27448,15 +28525,11 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -27472,6 +28545,7 @@ define zeroext i64 @test_vpcmpsgew_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -27495,12 +28569,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi872:
+; NoVLX-NEXT: .Lcfi1052:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi873:
+; NoVLX-NEXT: .Lcfi1053:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi874:
+; NoVLX-NEXT: .Lcfi1054:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -27509,15 +28583,15 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi875:
+; NoVLX-NEXT: .Lcfi1055:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi876:
+; NoVLX-NEXT: .Lcfi1056:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi877:
+; NoVLX-NEXT: .Lcfi1057:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi878:
+; NoVLX-NEXT: .Lcfi1058:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi879:
+; NoVLX-NEXT: .Lcfi1059:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
@@ -27526,6 +28600,10 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -27568,11 +28646,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -27584,15 +28662,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -27608,6 +28682,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -27632,12 +28707,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi880:
+; NoVLX-NEXT: .Lcfi1060:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi881:
+; NoVLX-NEXT: .Lcfi1061:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi882:
+; NoVLX-NEXT: .Lcfi1062:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -27646,15 +28721,15 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi883:
+; NoVLX-NEXT: .Lcfi1063:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi884:
+; NoVLX-NEXT: .Lcfi1064:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi885:
+; NoVLX-NEXT: .Lcfi1065:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi886:
+; NoVLX-NEXT: .Lcfi1066:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi887:
+; NoVLX-NEXT: .Lcfi1067:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
@@ -27664,6 +28739,10 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -27706,11 +28785,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -27722,15 +28801,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -27746,6 +28821,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <16 x i16>
@@ -27771,58 +28847,62 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi888:
+; NoVLX-NEXT: .Lcfi1068:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi889:
+; NoVLX-NEXT: .Lcfi1069:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi890:
+; NoVLX-NEXT: .Lcfi1070:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vmovq %xmm3, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm3
+; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
-; NoVLX-NEXT: vmovq %xmm4, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm2, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm5
@@ -27830,82 +28910,79 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm5
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm5
-; NoVLX-NEXT: vmovq %xmm5, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm6
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm6, %xmm5
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm6
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm6, %xmm6
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm6, %xmm6
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm6, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
@@ -27913,7 +28990,7 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -27921,41 +28998,40 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm7, %xmm7
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: vinserti128 $1, %xmm8, %ymm3, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm0, %ymm4, %ymm3
-; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm6, %ymm0
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm7, %xmm4
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm4, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm2
-; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
-; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm2
+; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
+; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -28104,6 +29180,7 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -28125,68 +29202,69 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi891:
+; NoVLX-NEXT: .Lcfi1071:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi892:
+; NoVLX-NEXT: .Lcfi1072:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi893:
+; NoVLX-NEXT: .Lcfi1073:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
-; NoVLX-NEXT: vmovq %xmm1, %rax
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm2
+; NoVLX-NEXT: vmovq %xmm2, %rax
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: movq %rax, %rdx
-; NoVLX-NEXT: vmovd %eax, %xmm2
+; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1
+; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm4
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -28194,8 +29272,7 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm0
-; NoVLX-NEXT: vmovq %xmm0, %rcx
+; NoVLX-NEXT: vmovq %xmm1, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
@@ -28205,24 +29282,24 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
-; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm2
-; NoVLX-NEXT: vmovdqa (%rdi), %ymm0
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0
-; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm1
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm1, %ymm2
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
+; NoVLX-NEXT: vmovdqa (%rdi), %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0
+; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2
; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
@@ -28375,6 +29452,7 @@ define zeroext i64 @test_vpcmpsgew_v32i1
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -28398,12 +29476,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi894:
+; NoVLX-NEXT: .Lcfi1074:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi895:
+; NoVLX-NEXT: .Lcfi1075:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi896:
+; NoVLX-NEXT: .Lcfi1076:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -28414,12 +29492,17 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rax, %rdx
; NoVLX-NEXT: vmovd %eax, %xmm3
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4
+; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm8
+; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5
+; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm6
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: shrq $32, %rdx
-; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
@@ -28427,61 +29510,59 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm4
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm4
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4
; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm6, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm6, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: vmovq %xmm0, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: vpextrq $1, %xmm0, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm0
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0
@@ -28489,8 +29570,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vmovq %xmm7, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: movl %ecx, %eax
@@ -28500,30 +29580,29 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm3
-; NoVLX-NEXT: vmovq %xmm3, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
+; NoVLX-NEXT: vmovq %xmm5, %rcx
; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm8
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm7, %xmm7
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm3, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpextrq $1, %xmm5, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm7, %xmm3
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3
@@ -28531,18 +29610,47 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3
-; NoVLX-NEXT: vextracti128 $1, %ymm1, %xmm7
-; NoVLX-NEXT: vmovq %xmm7, %rcx
+; NoVLX-NEXT: vmovq %xmm8, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
+; NoVLX-NEXT: vmovd %ecx, %xmm5
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rcx, %rax
+; NoVLX-NEXT: shrq $32, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpextrq $1, %xmm8, %rax
+; NoVLX-NEXT: shrq $48, %rcx
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: movl %eax, %ecx
+; NoVLX-NEXT: shrl $16, %ecx
+; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: movq %rax, %rcx
+; NoVLX-NEXT: shrq $32, %rcx
+; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5
+; NoVLX-NEXT: vmovq %xmm1, %rcx
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5
+; NoVLX-NEXT: movl %ecx, %eax
+; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vmovd %ecx, %xmm2
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpextrq $1, %xmm7, %rax
+; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
+; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1
+; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
+; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
+; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm3
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
; NoVLX-NEXT: shrq $48, %rcx
; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
@@ -28554,31 +29662,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vmovq %xmm1, %rax
-; NoVLX-NEXT: movl %eax, %ecx
-; NoVLX-NEXT: shrl $16, %ecx
-; NoVLX-NEXT: vmovd %eax, %xmm7
-; NoVLX-NEXT: vpinsrw $1, %ecx, %xmm7, %xmm7
-; NoVLX-NEXT: movq %rax, %rcx
-; NoVLX-NEXT: shrq $32, %rcx
-; NoVLX-NEXT: vpinsrw $2, %ecx, %xmm7, %xmm7
-; NoVLX-NEXT: vpextrq $1, %xmm1, %rcx
-; NoVLX-NEXT: vinserti128 $1, %xmm4, %ymm5, %ymm1
-; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4
-; NoVLX-NEXT: shrq $48, %rax
-; NoVLX-NEXT: vpinsrw $3, %eax, %xmm7, %xmm0
-; NoVLX-NEXT: movl %ecx, %eax
-; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: movq %rcx, %rax
-; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm2
-; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
+; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm2, %ymm2
; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
; NoVLX-NEXT: vpxor %ymm4, %ymm2, %ymm2
@@ -28645,83 +29729,77 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vinserti128 $1, %xmm8, %ymm3, %ymm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpxor %ymm4, %ymm1, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
+; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
@@ -28740,6 +29818,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -28764,12 +29843,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi897:
+; NoVLX-NEXT: .Lcfi1077:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi898:
+; NoVLX-NEXT: .Lcfi1078:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi899:
+; NoVLX-NEXT: .Lcfi1079:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $96, %rsp
@@ -28781,6 +29860,8 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vmovd %eax, %xmm2
; NoVLX-NEXT: shrl $16, %eax
; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
+; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4
; NoVLX-NEXT: shrq $32, %rdx
; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2
; NoVLX-NEXT: vpextrq $1, %xmm1, %rax
@@ -28793,20 +29874,19 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1
-; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
-; NoVLX-NEXT: vmovq %xmm2, %rcx
+; NoVLX-NEXT: vmovq %xmm4, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1
; NoVLX-NEXT: movl %ecx, %eax
; NoVLX-NEXT: shrl $16, %eax
-; NoVLX-NEXT: vmovd %ecx, %xmm3
-; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: movq %rcx, %rax
; NoVLX-NEXT: shrq $32, %rax
-; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3
-; NoVLX-NEXT: vpextrq $1, %xmm2, %rax
+; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpextrq $1, %xmm4, %rax
; NoVLX-NEXT: shrq $48, %rcx
-; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2
+; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2
; NoVLX-NEXT: movl %eax, %ecx
; NoVLX-NEXT: shrl $16, %ecx
; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2
@@ -28814,7 +29894,6 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: movq %rax, %rcx
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm3
; NoVLX-NEXT: vmovq %xmm3, %rcx
; NoVLX-NEXT: shrq $48, %rax
; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2
@@ -28856,160 +29935,160 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: shrq $32, %rcx
; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm4
; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
+; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2
; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
-; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
; NoVLX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm2
-; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm1
-; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1
-; NoVLX-NEXT: vmovdqa (%rsi), %ymm3
-; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1
-; NoVLX-NEXT: vpcmpeqd %ymm3, %ymm3, %ymm3
-; NoVLX-NEXT: vpxor %ymm3, %ymm1, %ymm1
-; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1
-; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
-; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
-; NoVLX-NEXT: kshiftlw $15, %k0, %k1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z}
+; NoVLX-NEXT: vpmovdb %zmm1, %xmm1
+; NoVLX-NEXT: shrq $48, %rax
+; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4
+; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm4, %ymm3
+; NoVLX-NEXT: vmovdqa (%rsi), %ymm4
+; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm5
+; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm3
+; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm3
+; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4
+; NoVLX-NEXT: vpxor %ymm4, %ymm5, %ymm2
+; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
+; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
+; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %eax, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: kshiftlw $15, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vmovd %ecx, %xmm2
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm4
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm4, %ymm2
-; NoVLX-NEXT: vpxor %ymm3, %ymm2, %ymm2
-; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpxor %ymm4, %ymm3, %ymm3
+; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3
+; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3
+; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm3
+; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1
-; NoVLX-NEXT: vpmovdb %zmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm2, %xmm2
-; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k1} {z}
+; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovdb %zmm3, %xmm3
-; NoVLX-NEXT: vpand %xmm3, %xmm2, %xmm2
-; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2
-; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2
-; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3
+; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1
+; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
+; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
+; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
+; NoVLX-NEXT: vpand %xmm0, %xmm2, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -29023,6 +30102,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <32 x i16>
@@ -29066,8 +30146,8 @@ define zeroext i8 @test_vpcmpsged_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -29084,6 +30164,7 @@ define zeroext i8 @test_vpcmpsged_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29124,8 +30205,8 @@ define zeroext i8 @test_vpcmpsged_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -29142,6 +30223,7 @@ define zeroext i8 @test_vpcmpsged_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29164,6 +30246,7 @@ define zeroext i8 @test_masked_vpcmpsged
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -29175,14 +30258,13 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -29199,8 +30281,8 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -29217,6 +30299,7 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29242,6 +30325,7 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -29253,14 +30337,13 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -29277,8 +30360,8 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -29295,6 +30378,7 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29341,8 +30425,8 @@ define zeroext i8 @test_vpcmpsged_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -29359,6 +30443,7 @@ define zeroext i8 @test_vpcmpsged_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29384,6 +30469,7 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -29395,14 +30481,13 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -29419,8 +30504,8 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -29437,6 +30522,7 @@ define zeroext i8 @test_masked_vpcmpsged
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29481,8 +30567,8 @@ define zeroext i16 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -29499,6 +30585,7 @@ define zeroext i16 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29538,8 +30625,8 @@ define zeroext i16 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -29556,6 +30643,7 @@ define zeroext i16 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29578,6 +30666,7 @@ define zeroext i16 @test_masked_vpcmpsge
;
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -29589,14 +30678,13 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -29612,8 +30700,8 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -29630,6 +30718,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29655,6 +30744,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -29666,14 +30756,13 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -29689,8 +30778,8 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -29707,6 +30796,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29752,8 +30842,8 @@ define zeroext i16 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -29770,6 +30860,7 @@ define zeroext i16 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29795,6 +30886,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -29806,14 +30898,13 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
@@ -29829,8 +30920,8 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -29847,6 +30938,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29873,12 +30965,12 @@ define zeroext i32 @test_vpcmpsged_v4i1_
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi900:
+; NoVLX-NEXT: .Lcfi1080:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi901:
+; NoVLX-NEXT: .Lcfi1081:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi902:
+; NoVLX-NEXT: .Lcfi1082:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -29898,6 +30990,7 @@ define zeroext i32 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29918,12 +31011,12 @@ define zeroext i32 @test_vpcmpsged_v4i1_
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi903:
+; NoVLX-NEXT: .Lcfi1083:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi904:
+; NoVLX-NEXT: .Lcfi1084:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi905:
+; NoVLX-NEXT: .Lcfi1085:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -29944,6 +31037,7 @@ define zeroext i32 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -29966,15 +31060,16 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi906:
+; NoVLX-NEXT: .Lcfi1086:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi907:
+; NoVLX-NEXT: .Lcfi1087:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi908:
+; NoVLX-NEXT: .Lcfi1088:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -29986,14 +31081,13 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -30007,6 +31101,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -30031,16 +31126,17 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi909:
+; NoVLX-NEXT: .Lcfi1089:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi910:
+; NoVLX-NEXT: .Lcfi1090:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi911:
+; NoVLX-NEXT: .Lcfi1091:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -30052,14 +31148,13 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -30073,6 +31168,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -30099,12 +31195,12 @@ define zeroext i32 @test_vpcmpsged_v4i1_
; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi912:
+; NoVLX-NEXT: .Lcfi1092:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi913:
+; NoVLX-NEXT: .Lcfi1093:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi914:
+; NoVLX-NEXT: .Lcfi1094:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -30125,6 +31221,7 @@ define zeroext i32 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -30149,16 +31246,17 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi915:
+; NoVLX-NEXT: .Lcfi1095:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi916:
+; NoVLX-NEXT: .Lcfi1096:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi917:
+; NoVLX-NEXT: .Lcfi1097:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -30170,14 +31268,13 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k3, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
-; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -30191,6 +31288,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -30217,12 +31315,12 @@ define zeroext i64 @test_vpcmpsged_v4i1_
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi918:
+; NoVLX-NEXT: .Lcfi1098:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi919:
+; NoVLX-NEXT: .Lcfi1099:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi920:
+; NoVLX-NEXT: .Lcfi1100:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -30231,8 +31329,8 @@ define zeroext i64 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -30248,6 +31346,7 @@ define zeroext i64 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -30268,12 +31367,12 @@ define zeroext i64 @test_vpcmpsged_v4i1_
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi921:
+; NoVLX-NEXT: .Lcfi1101:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi922:
+; NoVLX-NEXT: .Lcfi1102:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi923:
+; NoVLX-NEXT: .Lcfi1103:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -30283,8 +31382,8 @@ define zeroext i64 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -30300,6 +31399,7 @@ define zeroext i64 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -30322,18 +31422,19 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi924:
+; NoVLX-NEXT: .Lcfi1104:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi925:
+; NoVLX-NEXT: .Lcfi1105:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi926:
+; NoVLX-NEXT: .Lcfi1106:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -30346,14 +31447,13 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -30369,6 +31469,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -30393,12 +31494,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi927:
+; NoVLX-NEXT: .Lcfi1107:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi928:
+; NoVLX-NEXT: .Lcfi1108:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi929:
+; NoVLX-NEXT: .Lcfi1109:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -30406,6 +31507,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -30418,14 +31520,13 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -30441,6 +31542,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -30467,12 +31569,12 @@ define zeroext i64 @test_vpcmpsged_v4i1_
; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi930:
+; NoVLX-NEXT: .Lcfi1110:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi931:
+; NoVLX-NEXT: .Lcfi1111:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi932:
+; NoVLX-NEXT: .Lcfi1112:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -30482,8 +31584,8 @@ define zeroext i64 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -30499,6 +31601,7 @@ define zeroext i64 @test_vpcmpsged_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -30523,12 +31626,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi933:
+; NoVLX-NEXT: .Lcfi1113:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi934:
+; NoVLX-NEXT: .Lcfi1114:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi935:
+; NoVLX-NEXT: .Lcfi1115:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -30536,6 +31639,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -30548,14 +31652,13 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -30571,6 +31674,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <4 x i32>
@@ -30605,6 +31709,7 @@ define zeroext i16 @test_vpcmpsged_v8i1_
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -30633,6 +31738,7 @@ define zeroext i16 @test_vpcmpsged_v8i1_
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -30664,6 +31770,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -30696,6 +31803,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -30729,6 +31837,7 @@ define zeroext i16 @test_vpcmpsged_v8i1_
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -30762,6 +31871,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $8, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -30788,12 +31898,12 @@ define zeroext i32 @test_vpcmpsged_v8i1_
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi936:
+; NoVLX-NEXT: .Lcfi1116:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi937:
+; NoVLX-NEXT: .Lcfi1117:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi938:
+; NoVLX-NEXT: .Lcfi1118:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -30842,6 +31952,7 @@ define zeroext i32 @test_vpcmpsged_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -30863,12 +31974,12 @@ define zeroext i32 @test_vpcmpsged_v8i1_
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi939:
+; NoVLX-NEXT: .Lcfi1119:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi940:
+; NoVLX-NEXT: .Lcfi1120:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi941:
+; NoVLX-NEXT: .Lcfi1121:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -30917,6 +32028,7 @@ define zeroext i32 @test_vpcmpsged_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -30940,12 +32052,12 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi942:
+; NoVLX-NEXT: .Lcfi1122:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi943:
+; NoVLX-NEXT: .Lcfi1123:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi944:
+; NoVLX-NEXT: .Lcfi1124:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -30995,6 +32107,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -31019,12 +32132,12 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi945:
+; NoVLX-NEXT: .Lcfi1125:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi946:
+; NoVLX-NEXT: .Lcfi1126:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi947:
+; NoVLX-NEXT: .Lcfi1127:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -31074,6 +32187,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -31100,12 +32214,12 @@ define zeroext i32 @test_vpcmpsged_v8i1_
; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi948:
+; NoVLX-NEXT: .Lcfi1128:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi949:
+; NoVLX-NEXT: .Lcfi1129:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi950:
+; NoVLX-NEXT: .Lcfi1130:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -31154,6 +32268,7 @@ define zeroext i32 @test_vpcmpsged_v8i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -31179,12 +32294,12 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi951:
+; NoVLX-NEXT: .Lcfi1131:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi952:
+; NoVLX-NEXT: .Lcfi1132:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi953:
+; NoVLX-NEXT: .Lcfi1133:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -31234,6 +32349,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -31260,55 +32376,55 @@ define zeroext i64 @test_vpcmpsged_v8i1_
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi954:
+; NoVLX-NEXT: .Lcfi1134:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi955:
+; NoVLX-NEXT: .Lcfi1135:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi956:
+; NoVLX-NEXT: .Lcfi1136:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM1<def> %YMM1<kill> %ZMM1<def>
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -31319,6 +32435,7 @@ define zeroext i64 @test_vpcmpsged_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -31340,55 +32457,55 @@ define zeroext i64 @test_vpcmpsged_v8i1_
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi957:
+; NoVLX-NEXT: .Lcfi1137:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi958:
+; NoVLX-NEXT: .Lcfi1138:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi959:
+; NoVLX-NEXT: .Lcfi1139:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vmovdqa (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -31399,6 +32516,7 @@ define zeroext i64 @test_vpcmpsged_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -31422,12 +32540,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi960:
+; NoVLX-NEXT: .Lcfi1140:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi961:
+; NoVLX-NEXT: .Lcfi1141:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi962:
+; NoVLX-NEXT: .Lcfi1142:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -31435,43 +32553,43 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -31482,6 +32600,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -31506,12 +32625,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi963:
+; NoVLX-NEXT: .Lcfi1143:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi964:
+; NoVLX-NEXT: .Lcfi1144:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi965:
+; NoVLX-NEXT: .Lcfi1145:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -31519,43 +32638,43 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vmovdqa (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -31566,6 +32685,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -31592,55 +32712,55 @@ define zeroext i64 @test_vpcmpsged_v8i1_
; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi966:
+; NoVLX-NEXT: .Lcfi1146:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi967:
+; NoVLX-NEXT: .Lcfi1147:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi968:
+; NoVLX-NEXT: .Lcfi1148:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
; NoVLX-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -31651,6 +32771,7 @@ define zeroext i64 @test_vpcmpsged_v8i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -31676,12 +32797,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi969:
+; NoVLX-NEXT: .Lcfi1149:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi970:
+; NoVLX-NEXT: .Lcfi1150:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi971:
+; NoVLX-NEXT: .Lcfi1151:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -31689,43 +32810,43 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
-; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $0, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0
; NoVLX-NEXT: kshiftlw $8, %k0, %k0
-; NoVLX-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, %ecx
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -31736,6 +32857,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <8 x i32>
@@ -31762,78 +32884,93 @@ define zeroext i32 @test_vpcmpsged_v16i1
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi972:
+; NoVLX-NEXT: .Lcfi1152:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi973:
+; NoVLX-NEXT: .Lcfi1153:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi974:
+; NoVLX-NEXT: .Lcfi1154:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1155:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1156:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1157:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1158:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1159:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -31842,8 +32979,14 @@ define zeroext i32 @test_vpcmpsged_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -31865,88 +33008,109 @@ define zeroext i32 @test_vpcmpsged_v16i1
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi975:
+; NoVLX-NEXT: .Lcfi1160:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi976:
+; NoVLX-NEXT: .Lcfi1161:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi977:
+; NoVLX-NEXT: .Lcfi1162:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1163:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1164:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1165:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1166:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1167:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
-; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
-; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
-; NoVLX-NEXT: kmovw %k0, (%rsp)
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
+; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
+; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
+; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
+; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
+; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -31970,79 +33134,94 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi978:
+; NoVLX-NEXT: .Lcfi1168:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi979:
+; NoVLX-NEXT: .Lcfi1169:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi980:
+; NoVLX-NEXT: .Lcfi1170:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1171:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1172:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1173:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1174:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1175:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -32051,8 +33230,14 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -32077,79 +33262,94 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi981:
+; NoVLX-NEXT: .Lcfi1176:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi982:
+; NoVLX-NEXT: .Lcfi1177:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi983:
+; NoVLX-NEXT: .Lcfi1178:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1179:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1180:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1181:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1182:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1183:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -32158,8 +33358,14 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -32186,79 +33392,94 @@ define zeroext i32 @test_vpcmpsged_v16i1
; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi984:
+; NoVLX-NEXT: .Lcfi1184:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi985:
+; NoVLX-NEXT: .Lcfi1185:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi986:
+; NoVLX-NEXT: .Lcfi1186:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1187:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1188:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1189:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1190:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1191:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
; NoVLX-NEXT: kxorw %k0, %k0, %k1
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -32267,8 +33488,14 @@ define zeroext i32 @test_vpcmpsged_v16i1
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -32294,15 +33521,30 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi987:
+; NoVLX-NEXT: .Lcfi1192:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi988:
+; NoVLX-NEXT: .Lcfi1193:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi989:
+; NoVLX-NEXT: .Lcfi1194:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
+; NoVLX-NEXT: pushq %r15
+; NoVLX-NEXT: pushq %r14
+; NoVLX-NEXT: pushq %r13
+; NoVLX-NEXT: pushq %r12
+; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: .Lcfi1195:
+; NoVLX-NEXT: .cfi_offset %rbx, -56
+; NoVLX-NEXT: .Lcfi1196:
+; NoVLX-NEXT: .cfi_offset %r12, -48
+; NoVLX-NEXT: .Lcfi1197:
+; NoVLX-NEXT: .cfi_offset %r13, -40
+; NoVLX-NEXT: .Lcfi1198:
+; NoVLX-NEXT: .cfi_offset %r14, -32
+; NoVLX-NEXT: .Lcfi1199:
+; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
@@ -32310,64 +33552,64 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r8d
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $13, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: vmovd %ecx, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r9d
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
-; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r11d
; NoVLX-NEXT: kshiftlw $11, %k0, %k1
-; NoVLX-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r14d
; NoVLX-NEXT: kshiftlw $10, %k0, %k1
-; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r15d
; NoVLX-NEXT: kshiftlw $9, %k0, %k1
-; NoVLX-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r12d
; NoVLX-NEXT: kshiftlw $8, %k0, %k1
-; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %r13d
; NoVLX-NEXT: kshiftlw $7, %k0, %k1
-; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %esi
; NoVLX-NEXT: kshiftlw $6, %k0, %k1
-; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: kmovw %k1, %ebx
; NoVLX-NEXT: kshiftlw $5, %k0, %k1
-; NoVLX-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edi
; NoVLX-NEXT: kshiftlw $4, %k0, %k1
-; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: kshiftlw $3, %k0, %k1
-; NoVLX-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %edx
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
-; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
-; NoVLX-NEXT: kmovw %k1, %eax
+; NoVLX-NEXT: vmovd %r10d, %xmm0
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
-; NoVLX-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
+; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
-; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
@@ -32376,8 +33618,14 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, (%rsp)
; NoVLX-NEXT: movl (%rsp), %eax
-; NoVLX-NEXT: movq %rbp, %rsp
+; NoVLX-NEXT: leaq -40(%rbp), %rsp
+; NoVLX-NEXT: popq %rbx
+; NoVLX-NEXT: popq %r12
+; NoVLX-NEXT: popq %r13
+; NoVLX-NEXT: popq %r14
+; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -32404,12 +33652,12 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi990:
+; NoVLX-NEXT: .Lcfi1200:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi991:
+; NoVLX-NEXT: .Lcfi1201:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi992:
+; NoVLX-NEXT: .Lcfi1202:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -32418,17 +33666,21 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi993:
+; NoVLX-NEXT: .Lcfi1203:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi994:
+; NoVLX-NEXT: .Lcfi1204:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi995:
+; NoVLX-NEXT: .Lcfi1205:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi996:
+; NoVLX-NEXT: .Lcfi1206:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi997:
+; NoVLX-NEXT: .Lcfi1207:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -32471,11 +33723,11 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -32487,15 +33739,11 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -32511,6 +33759,7 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -32532,12 +33781,12 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi998:
+; NoVLX-NEXT: .Lcfi1208:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi999:
+; NoVLX-NEXT: .Lcfi1209:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1000:
+; NoVLX-NEXT: .Lcfi1210:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -32546,17 +33795,21 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1001:
+; NoVLX-NEXT: .Lcfi1211:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1002:
+; NoVLX-NEXT: .Lcfi1212:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1003:
+; NoVLX-NEXT: .Lcfi1213:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1004:
+; NoVLX-NEXT: .Lcfi1214:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1005:
+; NoVLX-NEXT: .Lcfi1215:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -32599,11 +33852,11 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -32615,15 +33868,11 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -32639,6 +33888,7 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -32662,12 +33912,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1006:
+; NoVLX-NEXT: .Lcfi1216:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1007:
+; NoVLX-NEXT: .Lcfi1217:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1008:
+; NoVLX-NEXT: .Lcfi1218:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -32676,18 +33926,22 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1009:
+; NoVLX-NEXT: .Lcfi1219:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1010:
+; NoVLX-NEXT: .Lcfi1220:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1011:
+; NoVLX-NEXT: .Lcfi1221:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1012:
+; NoVLX-NEXT: .Lcfi1222:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1013:
+; NoVLX-NEXT: .Lcfi1223:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -32730,11 +33984,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -32746,15 +34000,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -32770,6 +34020,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -32794,12 +34045,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1014:
+; NoVLX-NEXT: .Lcfi1224:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1015:
+; NoVLX-NEXT: .Lcfi1225:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1016:
+; NoVLX-NEXT: .Lcfi1226:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -32808,18 +34059,22 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1017:
+; NoVLX-NEXT: .Lcfi1227:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1018:
+; NoVLX-NEXT: .Lcfi1228:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1019:
+; NoVLX-NEXT: .Lcfi1229:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1020:
+; NoVLX-NEXT: .Lcfi1230:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1021:
+; NoVLX-NEXT: .Lcfi1231:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -32862,11 +34117,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -32878,15 +34133,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -32902,6 +34153,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -32928,12 +34180,12 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1022:
+; NoVLX-NEXT: .Lcfi1232:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1023:
+; NoVLX-NEXT: .Lcfi1233:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1024:
+; NoVLX-NEXT: .Lcfi1234:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -32942,18 +34194,22 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1025:
+; NoVLX-NEXT: .Lcfi1235:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1026:
+; NoVLX-NEXT: .Lcfi1236:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1027:
+; NoVLX-NEXT: .Lcfi1237:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1028:
+; NoVLX-NEXT: .Lcfi1238:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1029:
+; NoVLX-NEXT: .Lcfi1239:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -32996,11 +34252,11 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -33012,15 +34268,11 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -33036,6 +34288,7 @@ define zeroext i64 @test_vpcmpsged_v16i1
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -33061,12 +34314,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1030:
+; NoVLX-NEXT: .Lcfi1240:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1031:
+; NoVLX-NEXT: .Lcfi1241:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1032:
+; NoVLX-NEXT: .Lcfi1242:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: pushq %r15
; NoVLX-NEXT: pushq %r14
@@ -33075,19 +34328,23 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: pushq %rbx
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
-; NoVLX-NEXT: .Lcfi1033:
+; NoVLX-NEXT: .Lcfi1243:
; NoVLX-NEXT: .cfi_offset %rbx, -56
-; NoVLX-NEXT: .Lcfi1034:
+; NoVLX-NEXT: .Lcfi1244:
; NoVLX-NEXT: .cfi_offset %r12, -48
-; NoVLX-NEXT: .Lcfi1035:
+; NoVLX-NEXT: .Lcfi1245:
; NoVLX-NEXT: .cfi_offset %r13, -40
-; NoVLX-NEXT: .Lcfi1036:
+; NoVLX-NEXT: .Lcfi1246:
; NoVLX-NEXT: .cfi_offset %r14, -32
-; NoVLX-NEXT: .Lcfi1037:
+; NoVLX-NEXT: .Lcfi1247:
; NoVLX-NEXT: .cfi_offset %r15, -24
; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1
; NoVLX-NEXT: kmovw %edi, %k1
; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1}
+; NoVLX-NEXT: kxorw %k0, %k0, %k1
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: kshiftlw $14, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kmovw %k1, %r8d
@@ -33130,11 +34387,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftlw $2, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vmovd %r10d, %xmm0
-; NoVLX-NEXT: kmovw %k1, %ecx
+; NoVLX-NEXT: kmovw %k1, %r10d
; NoVLX-NEXT: kshiftlw $1, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k1, %r8d
+; NoVLX-NEXT: kmovw %k1, %ecx
; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0
@@ -33146,15 +34403,11 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0
-; NoVLX-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
+; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0
+; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0
; NoVLX-NEXT: kmovw %k0, %eax
-; NoVLX-NEXT: kxorw %k0, %k0, %k0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpinsrb $14, %r8d, %xmm0, %xmm0
; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0
-; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0
; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
@@ -33170,6 +34423,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: popq %r14
; NoVLX-NEXT: popq %r15
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__a to <16 x i32>
@@ -33204,6 +34458,7 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33234,6 +34489,7 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33256,6 +34512,7 @@ define zeroext i4 @test_masked_vpcmpsgeq
;
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask:
; NoVLX: # BB#0: # %entry
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -33263,16 +34520,16 @@ define zeroext i4 @test_masked_vpcmpsgeq
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33298,6 +34555,7 @@ define zeroext i4 @test_masked_vpcmpsgeq
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -33305,16 +34563,16 @@ define zeroext i4 @test_masked_vpcmpsgeq
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33351,6 +34609,7 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33376,6 +34635,7 @@ define zeroext i4 @test_masked_vpcmpsgeq
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -33383,16 +34643,16 @@ define zeroext i4 @test_masked_vpcmpsgeq
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero
; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp)
; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33440,6 +34700,7 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33482,6 +34743,7 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33533,6 +34795,7 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33587,6 +34850,7 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33635,6 +34899,7 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33689,6 +34954,7 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33735,6 +35001,7 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33776,6 +35043,7 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33826,6 +35094,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33879,6 +35148,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33926,6 +35196,7 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -33979,6 +35250,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34005,12 +35277,12 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1038:
+; NoVLX-NEXT: .Lcfi1248:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1039:
+; NoVLX-NEXT: .Lcfi1249:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1040:
+; NoVLX-NEXT: .Lcfi1250:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -34030,6 +35302,7 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34050,12 +35323,12 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1041:
+; NoVLX-NEXT: .Lcfi1251:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1042:
+; NoVLX-NEXT: .Lcfi1252:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1043:
+; NoVLX-NEXT: .Lcfi1253:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -34076,6 +35349,7 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34098,15 +35372,16 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1044:
+; NoVLX-NEXT: .Lcfi1254:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1045:
+; NoVLX-NEXT: .Lcfi1255:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1046:
+; NoVLX-NEXT: .Lcfi1256:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -34114,10 +35389,9 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -34131,6 +35405,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34155,16 +35430,17 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1047:
+; NoVLX-NEXT: .Lcfi1257:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1048:
+; NoVLX-NEXT: .Lcfi1258:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1049:
+; NoVLX-NEXT: .Lcfi1259:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vmovdqa (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -34172,10 +35448,9 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -34189,6 +35464,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34215,12 +35491,12 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1050:
+; NoVLX-NEXT: .Lcfi1260:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1051:
+; NoVLX-NEXT: .Lcfi1261:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1052:
+; NoVLX-NEXT: .Lcfi1262:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -34241,6 +35517,7 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34265,16 +35542,17 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1053:
+; NoVLX-NEXT: .Lcfi1263:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1054:
+; NoVLX-NEXT: .Lcfi1264:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1055:
+; NoVLX-NEXT: .Lcfi1265:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1
+; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $15, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -34282,10 +35560,9 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: kshiftrw $15, %k0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: kmovw %k1, %ecx
-; NoVLX-NEXT: vmovd %ecx, %xmm2
-; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2
-; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
-; NoVLX-NEXT: vpandn %xmm2, %xmm0, %xmm0
+; NoVLX-NEXT: vmovd %ecx, %xmm1
+; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
+; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
@@ -34299,6 +35576,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34325,12 +35603,12 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1056:
+; NoVLX-NEXT: .Lcfi1266:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1057:
+; NoVLX-NEXT: .Lcfi1267:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1058:
+; NoVLX-NEXT: .Lcfi1268:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -34339,8 +35617,8 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -34356,6 +35634,7 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34376,12 +35655,12 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1059:
+; NoVLX-NEXT: .Lcfi1269:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1060:
+; NoVLX-NEXT: .Lcfi1270:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1061:
+; NoVLX-NEXT: .Lcfi1271:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -34391,8 +35670,8 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -34408,6 +35687,7 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34430,12 +35710,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1062:
+; NoVLX-NEXT: .Lcfi1272:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1063:
+; NoVLX-NEXT: .Lcfi1273:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1064:
+; NoVLX-NEXT: .Lcfi1274:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -34452,8 +35732,8 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -34469,6 +35749,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34493,12 +35774,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1065:
+; NoVLX-NEXT: .Lcfi1275:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1066:
+; NoVLX-NEXT: .Lcfi1276:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1067:
+; NoVLX-NEXT: .Lcfi1277:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -34516,8 +35797,8 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -34533,6 +35814,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34559,12 +35841,12 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1068:
+; NoVLX-NEXT: .Lcfi1278:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1069:
+; NoVLX-NEXT: .Lcfi1279:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1070:
+; NoVLX-NEXT: .Lcfi1280:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -34574,8 +35856,8 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -34591,6 +35873,7 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34615,12 +35898,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1071:
+; NoVLX-NEXT: .Lcfi1281:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1072:
+; NoVLX-NEXT: .Lcfi1282:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1073:
+; NoVLX-NEXT: .Lcfi1283:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -34638,8 +35921,8 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -34655,6 +35938,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__a to <2 x i64>
@@ -34702,8 +35986,8 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -34720,6 +36004,7 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -34762,8 +36047,8 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -34780,6 +36065,7 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -34806,6 +36092,7 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -34823,7 +36110,6 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -34841,8 +36127,8 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -34859,6 +36145,7 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -34888,6 +36175,7 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -34905,7 +36193,6 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -34923,8 +36210,8 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -34941,6 +36228,7 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -34989,8 +36277,8 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -35007,6 +36295,7 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35036,6 +36325,7 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -35053,7 +36343,6 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -35071,8 +36360,8 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1
; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7]
@@ -35089,6 +36378,7 @@ define zeroext i8 @test_masked_vpcmpsgeq
; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35135,8 +36425,8 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -35153,6 +36443,7 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35194,8 +36485,8 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -35212,6 +36503,7 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35238,6 +36530,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -35255,7 +36548,6 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -35272,8 +36564,8 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -35290,6 +36582,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35319,6 +36612,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -35336,7 +36630,6 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -35353,8 +36646,8 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -35371,6 +36664,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35418,8 +36712,8 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -35436,6 +36730,7 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35465,6 +36760,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -35482,7 +36778,6 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpextrb $4, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
@@ -35499,8 +36794,8 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3
; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1
; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1
-; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z}
+; NoVLX-NEXT: vpextrb $8, %xmm0, %eax
; NoVLX-NEXT: kmovw %eax, %k1
; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z}
; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15]
@@ -35517,6 +36812,7 @@ define zeroext i16 @test_masked_vpcmpsge
; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0
; NoVLX-NEXT: kmovw %k0, %eax
; NoVLX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35544,12 +36840,12 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1074:
+; NoVLX-NEXT: .Lcfi1284:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1075:
+; NoVLX-NEXT: .Lcfi1285:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1076:
+; NoVLX-NEXT: .Lcfi1286:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -35570,6 +36866,7 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35591,12 +36888,12 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1077:
+; NoVLX-NEXT: .Lcfi1287:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1078:
+; NoVLX-NEXT: .Lcfi1288:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1079:
+; NoVLX-NEXT: .Lcfi1289:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -35618,6 +36915,7 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35641,18 +36939,19 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1080:
+; NoVLX-NEXT: .Lcfi1290:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1081:
+; NoVLX-NEXT: .Lcfi1291:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1082:
+; NoVLX-NEXT: .Lcfi1292:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -35670,7 +36969,6 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -35685,6 +36983,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35710,12 +37009,12 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1083:
+; NoVLX-NEXT: .Lcfi1293:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1084:
+; NoVLX-NEXT: .Lcfi1294:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1085:
+; NoVLX-NEXT: .Lcfi1295:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -35723,6 +37022,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -35740,7 +37040,6 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -35755,6 +37054,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35782,12 +37082,12 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1086:
+; NoVLX-NEXT: .Lcfi1296:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1087:
+; NoVLX-NEXT: .Lcfi1297:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1088:
+; NoVLX-NEXT: .Lcfi1298:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -35809,6 +37109,7 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35834,12 +37135,12 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1089:
+; NoVLX-NEXT: .Lcfi1299:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1090:
+; NoVLX-NEXT: .Lcfi1300:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1091:
+; NoVLX-NEXT: .Lcfi1301:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $32, %rsp
@@ -35847,6 +37148,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0
; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1
; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0
+; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
; NoVLX-NEXT: kshiftrw $15, %k1, %k1
@@ -35864,7 +37166,6 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0
; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
@@ -35879,6 +37180,7 @@ define zeroext i32 @test_masked_vpcmpsge
; NoVLX-NEXT: movl (%rsp), %eax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35906,12 +37208,12 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1092:
+; NoVLX-NEXT: .Lcfi1302:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1093:
+; NoVLX-NEXT: .Lcfi1303:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1094:
+; NoVLX-NEXT: .Lcfi1304:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -35921,8 +37223,8 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -35938,6 +37240,7 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -35959,12 +37262,12 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_
; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1095:
+; NoVLX-NEXT: .Lcfi1305:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1096:
+; NoVLX-NEXT: .Lcfi1306:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1097:
+; NoVLX-NEXT: .Lcfi1307:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -35975,8 +37278,8 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
-; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
+; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1
; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1
; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1
@@ -35992,6 +37295,7 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_
; NoVLX-NEXT: orq %rcx, %rax
; NoVLX-NEXT: movq %rbp, %rsp
; NoVLX-NEXT: popq %rbp
+; NoVLX-NEXT: vzeroupper
; NoVLX-NEXT: retq
entry:
%0 = bitcast <4 x i64> %__a to <4 x i64>
@@ -36015,12 +37319,12 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask:
; NoVLX: # BB#0: # %entry
; NoVLX-NEXT: pushq %rbp
-; NoVLX-NEXT: .Lcfi1098:
+; NoVLX-NEXT: .Lcfi1308:
; NoVLX-NEXT: .cfi_def_cfa_offset 16
-; NoVLX-NEXT: .Lcfi1099:
+; NoVLX-NEXT: .Lcfi1309:
; NoVLX-NEXT: .cfi_offset %rbp, -16
; NoVLX-NEXT: movq %rsp, %rbp
-; NoVLX-NEXT: .Lcfi1100:
+; NoVLX-NEXT: .Lcfi1310:
; NoVLX-NEXT: .cfi_def_cfa_register %rbp
; NoVLX-NEXT: andq $-32, %rsp
; NoVLX-NEXT: subq $64, %rsp
@@ -36030,6 +37334,7 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpmovqd %zmm0, %ymm0
; NoVLX-NEXT: kmovw %edi, %k0
; NoVLX-NEXT: kshiftlw $12, %k0, %k1
+; NoVLX-NEXT: kshiftrw $15, %k1, %k1
; NoVLX-NEXT: kshiftlw $13, %k0, %k2
; NoVLX-NEXT: kshiftrw $15, %k2, %k2
; NoVLX-NEXT: kshiftlw $15, %k0, %k3
@@ -36042,14 +37347,13 @@ define zeroext i64 @test_masked_vpcmpsge
; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
; NoVLX-NEXT: kmovw %k2, %eax
; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
-; NoVLX-NEXT: kshiftrw $15, %k1, %k0
-; NoVLX-NEXT: kmovw %k0, %eax
+; NoVLX-NEXT: kmovw %k1, %eax
; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0
; NoVLX-NEXT: kxorw %k0, %k0, %k0
; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp)
[... 14346 lines stripped ...]
More information about the llvm-commits
mailing list