[llvm] ba319ac - [X86] Remove a redundant COPY_TO_REGCLASS for VK16 after a KMOVWkr in an isel output pattern.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 25 15:19:58 PDT 2020


Author: Craig Topper
Date: 2020-08-25T15:19:27-07:00
New Revision: ba319ac47eeabc44673513ec7ab9e1e8e3b4ec69

URL: https://github.com/llvm/llvm-project/commit/ba319ac47eeabc44673513ec7ab9e1e8e3b4ec69
DIFF: https://github.com/llvm/llvm-project/commit/ba319ac47eeabc44673513ec7ab9e1e8e3b4ec69.diff

LOG: [X86] Remove a redundant COPY_TO_REGCLASS for VK16 after a KMOVWkr in an isel output pattern.

KMOVWkr produces VK16, there's no reason to copy it to VK16 again.

Test changes are presumably because we were scheduling based on
the COPY that is no longer there.

Added: 
    

Modified: 
    llvm/lib/Target/X86/X86InstrAVX512.td
    llvm/test/CodeGen/X86/avx512-ext.ll
    llvm/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/test/CodeGen/X86/avx512-mask-op.ll
    llvm/test/CodeGen/X86/avx512-vec-cmp.ll
    llvm/test/CodeGen/X86/vec_saddo.ll
    llvm/test/CodeGen/X86/vec_smulo.ll
    llvm/test/CodeGen/X86/vec_ssubo.ll
    llvm/test/CodeGen/X86/vec_uaddo.ll
    llvm/test/CodeGen/X86/vec_umulo.ll
    llvm/test/CodeGen/X86/vec_usubo.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index f9582238d30ff..0514a3a3611cc 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2967,10 +2967,9 @@ let Predicates = [HasAVX512] in {
 
   def : Pat<(insert_subvector (v16i1 immAllZerosV),
                               (v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
-            (COPY_TO_REGCLASS
-             (KMOVWkr (AND32ri8
-                       (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
-                       (i32 1))), VK16)>;
+            (KMOVWkr (AND32ri8
+                      (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
+                      (i32 1)))>;
 }
 
 // Mask unary operation

diff  --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index 5ce111806a2cc..fc1ba049c6912 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -1766,39 +1766,39 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
 define i16 @trunc_i32_to_i1(i32 %a) {
 ; KNL-LABEL: trunc_i32_to_i1:
 ; KNL:       # %bb.0:
-; KNL-NEXT:    movw $-4, %ax
-; KNL-NEXT:    kmovw %eax, %k0
-; KNL-NEXT:    kshiftrw $1, %k0, %k0
-; KNL-NEXT:    kshiftlw $1, %k0, %k0
 ; KNL-NEXT:    andl $1, %edi
-; KNL-NEXT:    kmovw %edi, %k1
-; KNL-NEXT:    korw %k1, %k0, %k0
+; KNL-NEXT:    kmovw %edi, %k0
+; KNL-NEXT:    movw $-4, %ax
+; KNL-NEXT:    kmovw %eax, %k1
+; KNL-NEXT:    kshiftrw $1, %k1, %k1
+; KNL-NEXT:    kshiftlw $1, %k1, %k1
+; KNL-NEXT:    korw %k0, %k1, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
 ; KNL-NEXT:    # kill: def $ax killed $ax killed $eax
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: trunc_i32_to_i1:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    movw $-4, %ax
-; SKX-NEXT:    kmovd %eax, %k0
-; SKX-NEXT:    kshiftrw $1, %k0, %k0
-; SKX-NEXT:    kshiftlw $1, %k0, %k0
 ; SKX-NEXT:    andl $1, %edi
-; SKX-NEXT:    kmovw %edi, %k1
-; SKX-NEXT:    korw %k1, %k0, %k0
+; SKX-NEXT:    kmovw %edi, %k0
+; SKX-NEXT:    movw $-4, %ax
+; SKX-NEXT:    kmovd %eax, %k1
+; SKX-NEXT:    kshiftrw $1, %k1, %k1
+; SKX-NEXT:    kshiftlw $1, %k1, %k1
+; SKX-NEXT:    korw %k0, %k1, %k0
 ; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    # kill: def $ax killed $ax killed $eax
 ; SKX-NEXT:    retq
 ;
 ; AVX512DQNOBW-LABEL: trunc_i32_to_i1:
 ; AVX512DQNOBW:       # %bb.0:
-; AVX512DQNOBW-NEXT:    movw $-4, %ax
-; AVX512DQNOBW-NEXT:    kmovw %eax, %k0
-; AVX512DQNOBW-NEXT:    kshiftrw $1, %k0, %k0
-; AVX512DQNOBW-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512DQNOBW-NEXT:    andl $1, %edi
-; AVX512DQNOBW-NEXT:    kmovw %edi, %k1
-; AVX512DQNOBW-NEXT:    korw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT:    kmovw %edi, %k0
+; AVX512DQNOBW-NEXT:    movw $-4, %ax
+; AVX512DQNOBW-NEXT:    kmovw %eax, %k1
+; AVX512DQNOBW-NEXT:    kshiftrw $1, %k1, %k1
+; AVX512DQNOBW-NEXT:    kshiftlw $1, %k1, %k1
+; AVX512DQNOBW-NEXT:    korw %k0, %k1, %k0
 ; AVX512DQNOBW-NEXT:    kmovw %k0, %eax
 ; AVX512DQNOBW-NEXT:    # kill: def $ax killed $ax killed $eax
 ; AVX512DQNOBW-NEXT:    retq

diff  --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 41bdaf21baa38..fd722e1beb135 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -2181,32 +2181,32 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
 define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %arg2) {
 ; KNL-LABEL: test_concat_v2i1:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    movzwl (%rdi), %eax
-; KNL-NEXT:    movzwl 2(%rdi), %ecx
+; KNL-NEXT:    movzwl 2(%rdi), %eax
+; KNL-NEXT:    movzwl (%rdi), %ecx
 ; KNL-NEXT:    vmovd %ecx, %xmm0
 ; KNL-NEXT:    vcvtph2ps %xmm0, %xmm0
 ; KNL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; KNL-NEXT:    vucomiss %xmm1, %xmm0
 ; KNL-NEXT:    setb %cl
+; KNL-NEXT:    andl $1, %ecx
 ; KNL-NEXT:    kmovw %ecx, %k0
-; KNL-NEXT:    kshiftlw $1, %k0, %k0
 ; KNL-NEXT:    vmovd %eax, %xmm2
 ; KNL-NEXT:    vcvtph2ps %xmm2, %xmm2
 ; KNL-NEXT:    vucomiss %xmm1, %xmm2
 ; KNL-NEXT:    setb %al
-; KNL-NEXT:    andl $1, %eax
 ; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    korw %k0, %k1, %k0
+; KNL-NEXT:    kshiftlw $1, %k1, %k1
+; KNL-NEXT:    korw %k1, %k0, %k0
 ; KNL-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; KNL-NEXT:    vucomiss %xmm1, %xmm0
 ; KNL-NEXT:    seta %al
+; KNL-NEXT:    andl $1, %eax
 ; KNL-NEXT:    kmovw %eax, %k1
-; KNL-NEXT:    kshiftlw $1, %k1, %k1
 ; KNL-NEXT:    vucomiss %xmm1, %xmm2
 ; KNL-NEXT:    seta %al
-; KNL-NEXT:    andl $1, %eax
 ; KNL-NEXT:    kmovw %eax, %k2
-; KNL-NEXT:    korw %k1, %k2, %k1
+; KNL-NEXT:    kshiftlw $1, %k2, %k2
+; KNL-NEXT:    korw %k2, %k1, %k1
 ; KNL-NEXT:    kandw %k1, %k0, %k0
 ; KNL-NEXT:    kshiftrw $1, %k0, %k1
 ; KNL-NEXT:    kmovw %k1, %ecx

diff  --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index e67b81581396d..67067e3fff27d 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -5157,13 +5157,13 @@ define <64 x i1> @mask64_insert(i32 %a) {
 ; KNL-LABEL: mask64_insert:
 ; KNL:       ## %bb.0:
 ; KNL-NEXT:    movq %rdi, %rax
-; KNL-NEXT:    movw $-4, %cx
-; KNL-NEXT:    kmovw %ecx, %k0
-; KNL-NEXT:    kshiftrw $1, %k0, %k0
-; KNL-NEXT:    kshiftlw $1, %k0, %k0
 ; KNL-NEXT:    andl $1, %esi
-; KNL-NEXT:    kmovw %esi, %k1
-; KNL-NEXT:    korw %k1, %k0, %k0
+; KNL-NEXT:    kmovw %esi, %k0
+; KNL-NEXT:    movw $-4, %cx
+; KNL-NEXT:    kmovw %ecx, %k1
+; KNL-NEXT:    kshiftrw $1, %k1, %k1
+; KNL-NEXT:    kshiftlw $1, %k1, %k1
+; KNL-NEXT:    korw %k0, %k1, %k0
 ; KNL-NEXT:    kmovw %k0, (%rdi)
 ; KNL-NEXT:    movw $-3, 6(%rdi)
 ; KNL-NEXT:    movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
@@ -5198,13 +5198,13 @@ define <64 x i1> @mask64_insert(i32 %a) {
 ; AVX512DQ-LABEL: mask64_insert:
 ; AVX512DQ:       ## %bb.0:
 ; AVX512DQ-NEXT:    movq %rdi, %rax
-; AVX512DQ-NEXT:    movw $-4, %cx
-; AVX512DQ-NEXT:    kmovw %ecx, %k0
-; AVX512DQ-NEXT:    kshiftrw $1, %k0, %k0
-; AVX512DQ-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512DQ-NEXT:    andl $1, %esi
-; AVX512DQ-NEXT:    kmovw %esi, %k1
-; AVX512DQ-NEXT:    korw %k1, %k0, %k0
+; AVX512DQ-NEXT:    kmovw %esi, %k0
+; AVX512DQ-NEXT:    movw $-4, %cx
+; AVX512DQ-NEXT:    kmovw %ecx, %k1
+; AVX512DQ-NEXT:    kshiftrw $1, %k1, %k1
+; AVX512DQ-NEXT:    kshiftlw $1, %k1, %k1
+; AVX512DQ-NEXT:    korw %k0, %k1, %k0
 ; AVX512DQ-NEXT:    kmovw %k0, (%rdi)
 ; AVX512DQ-NEXT:    movw $-3, 6(%rdi)
 ; AVX512DQ-NEXT:    movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD

diff  --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index 719bd9f9d95f0..7dcae9a2d24ac 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1434,8 +1434,8 @@ define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32>
 define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
 ; KNL-LABEL: half_vec_compare:
 ; KNL:       ## %bb.0: ## %entry
-; KNL-NEXT:    movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
-; KNL-NEXT:    movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
+; KNL-NEXT:    movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
+; KNL-NEXT:    movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
 ; KNL-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; KNL-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
 ; KNL-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
@@ -1443,17 +1443,17 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
 ; KNL-NEXT:    setp %cl ## encoding: [0x0f,0x9a,0xc1]
 ; KNL-NEXT:    setne %dl ## encoding: [0x0f,0x95,0xc2]
 ; KNL-NEXT:    orb %cl, %dl ## encoding: [0x08,0xca]
+; KNL-NEXT:    andl $1, %edx ## encoding: [0x83,0xe2,0x01]
 ; KNL-NEXT:    kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
-; KNL-NEXT:    kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01]
 ; KNL-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
 ; KNL-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
 ; KNL-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
 ; KNL-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
 ; KNL-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
 ; KNL-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
-; KNL-NEXT:    andl $1, %ecx ## encoding: [0x83,0xe1,0x01]
 ; KNL-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
-; KNL-NEXT:    korw %k0, %k1, %k1 ## encoding: [0xc5,0xf4,0x45,0xc8]
+; KNL-NEXT:    kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
+; KNL-NEXT:    korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
 ; KNL-NEXT:    vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
 ; KNL-NEXT:    vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
 ; KNL-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
@@ -1465,8 +1465,8 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
 ;
 ; AVX512BW-LABEL: half_vec_compare:
 ; AVX512BW:       ## %bb.0: ## %entry
-; AVX512BW-NEXT:    movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
-; AVX512BW-NEXT:    movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
+; AVX512BW-NEXT:    movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
+; AVX512BW-NEXT:    movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
 ; AVX512BW-NEXT:    vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
 ; AVX512BW-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
 ; AVX512BW-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
@@ -1474,17 +1474,17 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
 ; AVX512BW-NEXT:    setp %cl ## encoding: [0x0f,0x9a,0xc1]
 ; AVX512BW-NEXT:    setne %dl ## encoding: [0x0f,0x95,0xc2]
 ; AVX512BW-NEXT:    orb %cl, %dl ## encoding: [0x08,0xca]
-; AVX512BW-NEXT:    kmovd %edx, %k0 ## encoding: [0xc5,0xfb,0x92,0xc2]
-; AVX512BW-NEXT:    kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01]
+; AVX512BW-NEXT:    andl $1, %edx ## encoding: [0x83,0xe2,0x01]
+; AVX512BW-NEXT:    kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
 ; AVX512BW-NEXT:    vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
 ; AVX512BW-NEXT:    vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
 ; AVX512BW-NEXT:    vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
 ; AVX512BW-NEXT:    setp %al ## encoding: [0x0f,0x9a,0xc0]
 ; AVX512BW-NEXT:    setne %cl ## encoding: [0x0f,0x95,0xc1]
 ; AVX512BW-NEXT:    orb %al, %cl ## encoding: [0x08,0xc1]
-; AVX512BW-NEXT:    andl $1, %ecx ## encoding: [0x83,0xe1,0x01]
-; AVX512BW-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
-; AVX512BW-NEXT:    korw %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x45,0xc0]
+; AVX512BW-NEXT:    kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9]
+; AVX512BW-NEXT:    kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
+; AVX512BW-NEXT:    korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
 ; AVX512BW-NEXT:    vpmovm2w %k0, %zmm0 ## encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
 ; AVX512BW-NEXT:    vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
 ; AVX512BW-NEXT:    vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]

diff  --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll
index e1f780da4fce6..6bee501e06a40 100644
--- a/llvm/test/CodeGen/X86/vec_saddo.ll
+++ b/llvm/test/CodeGen/X86/vec_saddo.ll
@@ -1372,48 +1372,48 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ;
 ; AVX512-LABEL: saddo_v2i128:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    pushq %rbp
+; AVX512-NEXT:    pushq %r14
 ; AVX512-NEXT:    pushq %rbx
-; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT:    testq %r9, %r9
-; AVX512-NEXT:    setns %al
-; AVX512-NEXT:    testq %rsi, %rsi
-; AVX512-NEXT:    setns %bl
-; AVX512-NEXT:    cmpb %al, %bl
-; AVX512-NEXT:    sete %bpl
-; AVX512-NEXT:    addq %r8, %rdi
-; AVX512-NEXT:    adcq %r9, %rsi
-; AVX512-NEXT:    setns %al
-; AVX512-NEXT:    cmpb %al, %bl
-; AVX512-NEXT:    setne %al
-; AVX512-NEXT:    andb %bpl, %al
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; AVX512-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT:    movq %rcx, %rbp
-; AVX512-NEXT:    adcq %r10, %rbp
+; AVX512-NEXT:    movq %rcx, %r14
+; AVX512-NEXT:    adcq %r11, %r14
 ; AVX512-NEXT:    setns %bl
 ; AVX512-NEXT:    testq %rcx, %rcx
 ; AVX512-NEXT:    setns %cl
 ; AVX512-NEXT:    cmpb %bl, %cl
-; AVX512-NEXT:    setne %r8b
-; AVX512-NEXT:    testq %r10, %r10
+; AVX512-NEXT:    setne %bl
+; AVX512-NEXT:    testq %r11, %r11
+; AVX512-NEXT:    setns %al
+; AVX512-NEXT:    cmpb %al, %cl
+; AVX512-NEXT:    sete %al
+; AVX512-NEXT:    andb %bl, %al
+; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    testq %r9, %r9
+; AVX512-NEXT:    setns %al
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    setns %cl
+; AVX512-NEXT:    cmpb %al, %cl
+; AVX512-NEXT:    sete %al
+; AVX512-NEXT:    addq %r8, %rdi
+; AVX512-NEXT:    adcq %r9, %rsi
 ; AVX512-NEXT:    setns %bl
 ; AVX512-NEXT:    cmpb %bl, %cl
-; AVX512-NEXT:    sete %cl
-; AVX512-NEXT:    andb %r8b, %cl
-; AVX512-NEXT:    kmovd %ecx, %k0
+; AVX512-NEXT:    setne %cl
+; AVX512-NEXT:    andb %al, %cl
+; AVX512-NEXT:    andl $1, %ecx
+; AVX512-NEXT:    kmovw %ecx, %k1
 ; AVX512-NEXT:    kshiftlw $1, %k0, %k0
-; AVX512-NEXT:    andl $1, %eax
-; AVX512-NEXT:    kmovw %eax, %k1
 ; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    movq %rdx, 16(%r11)
-; AVX512-NEXT:    movq %rdi, (%r11)
-; AVX512-NEXT:    movq %rbp, 24(%r11)
-; AVX512-NEXT:    movq %rsi, 8(%r11)
+; AVX512-NEXT:    movq %rdx, 16(%r10)
+; AVX512-NEXT:    movq %rdi, (%r10)
+; AVX512-NEXT:    movq %r14, 24(%r10)
+; AVX512-NEXT:    movq %rsi, 8(%r10)
 ; AVX512-NEXT:    popq %rbx
-; AVX512-NEXT:    popq %rbp
+; AVX512-NEXT:    popq %r14
 ; AVX512-NEXT:    retq
   %t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
   %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0

diff  --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index ad0a8f8ff12a0..1b5aef61ebf3b 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -3942,39 +3942,39 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    pushq %rbx
 ; AVX512-NEXT:    subq $24, %rsp
 ; AVX512-NEXT:    movq %r8, %rax
-; AVX512-NEXT:    movq %rcx, %r15
+; AVX512-NEXT:    movq %rcx, %r14
 ; AVX512-NEXT:    movq %rdx, %rbx
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r15
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r12
-; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r13
 ; AVX512-NEXT:    movq $0, {{[0-9]+}}(%rsp)
 ; AVX512-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; AVX512-NEXT:    movq %rax, %rdx
 ; AVX512-NEXT:    movq %r9, %rcx
 ; AVX512-NEXT:    callq __muloti4
-; AVX512-NEXT:    movq %rax, %r14
+; AVX512-NEXT:    movq %rax, %r13
 ; AVX512-NEXT:    movq %rdx, %rbp
 ; AVX512-NEXT:    movq $0, {{[0-9]+}}(%rsp)
 ; AVX512-NEXT:    leaq {{[0-9]+}}(%rsp), %r8
 ; AVX512-NEXT:    movq %rbx, %rdi
-; AVX512-NEXT:    movq %r15, %rsi
+; AVX512-NEXT:    movq %r14, %rsi
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT:    movq %r13, %rcx
+; AVX512-NEXT:    movq %r12, %rcx
 ; AVX512-NEXT:    callq __muloti4
 ; AVX512-NEXT:    cmpq $0, {{[0-9]+}}(%rsp)
 ; AVX512-NEXT:    setne %cl
 ; AVX512-NEXT:    kmovd %ecx, %k0
 ; AVX512-NEXT:    cmpq $0, {{[0-9]+}}(%rsp)
 ; AVX512-NEXT:    setne %cl
-; AVX512-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512-NEXT:    andl $1, %ecx
 ; AVX512-NEXT:    kmovw %ecx, %k1
+; AVX512-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    movq %rdx, 24(%r12)
-; AVX512-NEXT:    movq %rax, 16(%r12)
-; AVX512-NEXT:    movq %rbp, 8(%r12)
-; AVX512-NEXT:    movq %r14, (%r12)
+; AVX512-NEXT:    movq %rdx, 24(%r15)
+; AVX512-NEXT:    movq %rax, 16(%r15)
+; AVX512-NEXT:    movq %rbp, 8(%r15)
+; AVX512-NEXT:    movq %r13, (%r15)
 ; AVX512-NEXT:    addq $24, %rsp
 ; AVX512-NEXT:    popq %rbx
 ; AVX512-NEXT:    popq %r12

diff  --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll
index 4e2c3a57831f5..9981643ba2d4c 100644
--- a/llvm/test/CodeGen/X86/vec_ssubo.ll
+++ b/llvm/test/CodeGen/X86/vec_ssubo.ll
@@ -1381,48 +1381,48 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ;
 ; AVX512-LABEL: ssubo_v2i128:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    pushq %rbp
+; AVX512-NEXT:    pushq %r14
 ; AVX512-NEXT:    pushq %rbx
-; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT:    testq %r9, %r9
-; AVX512-NEXT:    setns %al
-; AVX512-NEXT:    testq %rsi, %rsi
-; AVX512-NEXT:    setns %bl
-; AVX512-NEXT:    cmpb %al, %bl
-; AVX512-NEXT:    setne %bpl
-; AVX512-NEXT:    subq %r8, %rdi
-; AVX512-NEXT:    sbbq %r9, %rsi
-; AVX512-NEXT:    setns %al
-; AVX512-NEXT:    cmpb %al, %bl
-; AVX512-NEXT:    setne %al
-; AVX512-NEXT:    andb %bpl, %al
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r11
 ; AVX512-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT:    movq %rcx, %rbp
-; AVX512-NEXT:    sbbq %r10, %rbp
+; AVX512-NEXT:    movq %rcx, %r14
+; AVX512-NEXT:    sbbq %r11, %r14
 ; AVX512-NEXT:    setns %bl
 ; AVX512-NEXT:    testq %rcx, %rcx
 ; AVX512-NEXT:    setns %cl
 ; AVX512-NEXT:    cmpb %bl, %cl
-; AVX512-NEXT:    setne %r8b
-; AVX512-NEXT:    testq %r10, %r10
+; AVX512-NEXT:    setne %bl
+; AVX512-NEXT:    testq %r11, %r11
+; AVX512-NEXT:    setns %al
+; AVX512-NEXT:    cmpb %al, %cl
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    andb %bl, %al
+; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    testq %r9, %r9
+; AVX512-NEXT:    setns %al
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    setns %cl
+; AVX512-NEXT:    cmpb %al, %cl
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    subq %r8, %rdi
+; AVX512-NEXT:    sbbq %r9, %rsi
 ; AVX512-NEXT:    setns %bl
 ; AVX512-NEXT:    cmpb %bl, %cl
 ; AVX512-NEXT:    setne %cl
-; AVX512-NEXT:    andb %r8b, %cl
-; AVX512-NEXT:    kmovd %ecx, %k0
+; AVX512-NEXT:    andb %al, %cl
+; AVX512-NEXT:    andl $1, %ecx
+; AVX512-NEXT:    kmovw %ecx, %k1
 ; AVX512-NEXT:    kshiftlw $1, %k0, %k0
-; AVX512-NEXT:    andl $1, %eax
-; AVX512-NEXT:    kmovw %eax, %k1
 ; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    movq %rdx, 16(%r11)
-; AVX512-NEXT:    movq %rdi, (%r11)
-; AVX512-NEXT:    movq %rbp, 24(%r11)
-; AVX512-NEXT:    movq %rsi, 8(%r11)
+; AVX512-NEXT:    movq %rdx, 16(%r10)
+; AVX512-NEXT:    movq %rdi, (%r10)
+; AVX512-NEXT:    movq %r14, 24(%r10)
+; AVX512-NEXT:    movq %rsi, 8(%r10)
 ; AVX512-NEXT:    popq %rbx
-; AVX512-NEXT:    popq %rbp
+; AVX512-NEXT:    popq %r14
 ; AVX512-NEXT:    retq
   %t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
   %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0

diff  --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll
index 9a153253a1695..c34653be4a02c 100644
--- a/llvm/test/CodeGen/X86/vec_uaddo.ll
+++ b/llvm/test/CodeGen/X86/vec_uaddo.ll
@@ -1282,16 +1282,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-LABEL: uaddo_v2i128:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT:    addq %r8, %rdi
-; AVX512-NEXT:    adcq %r9, %rsi
-; AVX512-NEXT:    setb %r8b
 ; AVX512-NEXT:    addq {{[0-9]+}}(%rsp), %rdx
 ; AVX512-NEXT:    adcq {{[0-9]+}}(%rsp), %rcx
 ; AVX512-NEXT:    setb %al
 ; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    addq %r8, %rdi
+; AVX512-NEXT:    adcq %r9, %rsi
+; AVX512-NEXT:    setb %al
+; AVX512-NEXT:    andl $1, %eax
+; AVX512-NEXT:    kmovw %eax, %k1
 ; AVX512-NEXT:    kshiftlw $1, %k0, %k0
-; AVX512-NEXT:    andl $1, %r8d
-; AVX512-NEXT:    kmovw %r8d, %k1
 ; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}

diff  --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll
index 54bb86dc5a0f4..cc25fd5bec783 100644
--- a/llvm/test/CodeGen/X86/vec_umulo.ll
+++ b/llvm/test/CodeGen/X86/vec_umulo.ll
@@ -3689,68 +3689,66 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-NEXT:    pushq %r13
 ; AVX512-NEXT:    pushq %r12
 ; AVX512-NEXT:    pushq %rbx
-; AVX512-NEXT:    movq %r9, %r10
-; AVX512-NEXT:    movq %rcx, %r9
-; AVX512-NEXT:    movq %rdx, %r11
-; AVX512-NEXT:    movq %rsi, %rax
-; AVX512-NEXT:    movq %rdi, %rsi
+; AVX512-NEXT:    movq %rcx, %rax
+; AVX512-NEXT:    movq %rdx, %r12
+; AVX512-NEXT:    movq %rdi, %r11
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r14
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r15
-; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r12
+; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
 ; AVX512-NEXT:    testq %r10, %r10
 ; AVX512-NEXT:    setne %dl
-; AVX512-NEXT:    testq %rax, %rax
-; AVX512-NEXT:    setne %bl
-; AVX512-NEXT:    andb %dl, %bl
-; AVX512-NEXT:    mulq %r8
-; AVX512-NEXT:    movq %rax, %r13
+; AVX512-NEXT:    testq %rcx, %rcx
+; AVX512-NEXT:    setne %r13b
+; AVX512-NEXT:    andb %dl, %r13b
+; AVX512-NEXT:    mulq %r15
+; AVX512-NEXT:    movq %rax, %rdi
 ; AVX512-NEXT:    seto %bpl
 ; AVX512-NEXT:    movq %r10, %rax
-; AVX512-NEXT:    mulq %rdi
-; AVX512-NEXT:    movq %rax, %rdi
+; AVX512-NEXT:    mulq %r12
+; AVX512-NEXT:    movq %rax, %rbx
 ; AVX512-NEXT:    seto %cl
 ; AVX512-NEXT:    orb %bpl, %cl
-; AVX512-NEXT:    addq %r13, %rdi
+; AVX512-NEXT:    addq %rdi, %rbx
+; AVX512-NEXT:    movq %r12, %rax
+; AVX512-NEXT:    mulq %r15
+; AVX512-NEXT:    movq %rax, %r10
+; AVX512-NEXT:    movq %rdx, %r15
+; AVX512-NEXT:    addq %rbx, %r15
+; AVX512-NEXT:    setb %al
+; AVX512-NEXT:    orb %cl, %al
+; AVX512-NEXT:    orb %r13b, %al
+; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    testq %r9, %r9
+; AVX512-NEXT:    setne %al
+; AVX512-NEXT:    testq %rsi, %rsi
+; AVX512-NEXT:    setne %cl
+; AVX512-NEXT:    andb %al, %cl
 ; AVX512-NEXT:    movq %rsi, %rax
 ; AVX512-NEXT:    mulq %r8
-; AVX512-NEXT:    movq %rax, %r8
-; AVX512-NEXT:    movq %rdx, %r10
-; AVX512-NEXT:    addq %rdi, %r10
-; AVX512-NEXT:    setb %sil
-; AVX512-NEXT:    orb %cl, %sil
-; AVX512-NEXT:    orb %bl, %sil
-; AVX512-NEXT:    testq %r12, %r12
-; AVX512-NEXT:    setne %al
-; AVX512-NEXT:    testq %r9, %r9
-; AVX512-NEXT:    setne %bpl
-; AVX512-NEXT:    andb %al, %bpl
+; AVX512-NEXT:    movq %rax, %rsi
+; AVX512-NEXT:    seto %bpl
 ; AVX512-NEXT:    movq %r9, %rax
-; AVX512-NEXT:    mulq %r15
-; AVX512-NEXT:    movq %rax, %rdi
-; AVX512-NEXT:    seto %r9b
-; AVX512-NEXT:    movq %r12, %rax
 ; AVX512-NEXT:    mulq %r11
-; AVX512-NEXT:    movq %rax, %rbx
-; AVX512-NEXT:    seto %cl
-; AVX512-NEXT:    orb %r9b, %cl
-; AVX512-NEXT:    addq %rdi, %rbx
+; AVX512-NEXT:    movq %rax, %rdi
+; AVX512-NEXT:    seto %bl
+; AVX512-NEXT:    orb %bpl, %bl
+; AVX512-NEXT:    addq %rsi, %rdi
 ; AVX512-NEXT:    movq %r11, %rax
-; AVX512-NEXT:    mulq %r15
-; AVX512-NEXT:    addq %rbx, %rdx
-; AVX512-NEXT:    setb %dil
-; AVX512-NEXT:    orb %cl, %dil
-; AVX512-NEXT:    orb %bpl, %dil
-; AVX512-NEXT:    kmovd %edi, %k0
-; AVX512-NEXT:    kshiftlw $1, %k0, %k0
+; AVX512-NEXT:    mulq %r8
+; AVX512-NEXT:    addq %rdi, %rdx
+; AVX512-NEXT:    setb %sil
+; AVX512-NEXT:    orb %bl, %sil
+; AVX512-NEXT:    orb %cl, %sil
 ; AVX512-NEXT:    andl $1, %esi
 ; AVX512-NEXT:    kmovw %esi, %k1
+; AVX512-NEXT:    kshiftlw $1, %k0, %k0
 ; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT:    movq %rax, 16(%r14)
-; AVX512-NEXT:    movq %r8, (%r14)
-; AVX512-NEXT:    movq %rdx, 24(%r14)
-; AVX512-NEXT:    movq %r10, 8(%r14)
+; AVX512-NEXT:    movq %r10, 16(%r14)
+; AVX512-NEXT:    movq %rax, (%r14)
+; AVX512-NEXT:    movq %r15, 24(%r14)
+; AVX512-NEXT:    movq %rdx, 8(%r14)
 ; AVX512-NEXT:    popq %rbx
 ; AVX512-NEXT:    popq %r12
 ; AVX512-NEXT:    popq %r13

diff  --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll
index 0381394e74134..76c3e5ad32909 100644
--- a/llvm/test/CodeGen/X86/vec_usubo.ll
+++ b/llvm/test/CodeGen/X86/vec_usubo.ll
@@ -1329,16 +1329,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
 ; AVX512-LABEL: usubo_v2i128:
 ; AVX512:       # %bb.0:
 ; AVX512-NEXT:    movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT:    subq %r8, %rdi
-; AVX512-NEXT:    sbbq %r9, %rsi
-; AVX512-NEXT:    setb %r8b
 ; AVX512-NEXT:    subq {{[0-9]+}}(%rsp), %rdx
 ; AVX512-NEXT:    sbbq {{[0-9]+}}(%rsp), %rcx
 ; AVX512-NEXT:    setb %al
 ; AVX512-NEXT:    kmovd %eax, %k0
+; AVX512-NEXT:    subq %r8, %rdi
+; AVX512-NEXT:    sbbq %r9, %rsi
+; AVX512-NEXT:    setb %al
+; AVX512-NEXT:    andl $1, %eax
+; AVX512-NEXT:    kmovw %eax, %k1
 ; AVX512-NEXT:    kshiftlw $1, %k0, %k0
-; AVX512-NEXT:    andl $1, %r8d
-; AVX512-NEXT:    kmovw %r8d, %k1
 ; AVX512-NEXT:    korw %k0, %k1, %k1
 ; AVX512-NEXT:    vpcmpeqd %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}


        


More information about the llvm-commits mailing list