[llvm] ba319ac - [X86] Remove a redundant COPY_TO_REGCLASS for VK16 after a KMOVWkr in an isel output pattern.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 25 15:19:58 PDT 2020
Author: Craig Topper
Date: 2020-08-25T15:19:27-07:00
New Revision: ba319ac47eeabc44673513ec7ab9e1e8e3b4ec69
URL: https://github.com/llvm/llvm-project/commit/ba319ac47eeabc44673513ec7ab9e1e8e3b4ec69
DIFF: https://github.com/llvm/llvm-project/commit/ba319ac47eeabc44673513ec7ab9e1e8e3b4ec69.diff
LOG: [X86] Remove a redundant COPY_TO_REGCLASS for VK16 after a KMOVWkr in an isel output pattern.
KMOVWkr produces VK16, there's no reason to copy it to VK16 again.
Test changes are presumably because we were scheduling based on
the COPY that is no longer there.
Added:
Modified:
llvm/lib/Target/X86/X86InstrAVX512.td
llvm/test/CodeGen/X86/avx512-ext.ll
llvm/test/CodeGen/X86/avx512-insert-extract.ll
llvm/test/CodeGen/X86/avx512-mask-op.ll
llvm/test/CodeGen/X86/avx512-vec-cmp.ll
llvm/test/CodeGen/X86/vec_saddo.ll
llvm/test/CodeGen/X86/vec_smulo.ll
llvm/test/CodeGen/X86/vec_ssubo.ll
llvm/test/CodeGen/X86/vec_uaddo.ll
llvm/test/CodeGen/X86/vec_umulo.ll
llvm/test/CodeGen/X86/vec_usubo.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index f9582238d30ff..0514a3a3611cc 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -2967,10 +2967,9 @@ let Predicates = [HasAVX512] in {
def : Pat<(insert_subvector (v16i1 immAllZerosV),
(v1i1 (scalar_to_vector GR8:$src)), (iPTR 0)),
- (COPY_TO_REGCLASS
- (KMOVWkr (AND32ri8
- (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
- (i32 1))), VK16)>;
+ (KMOVWkr (AND32ri8
+ (INSERT_SUBREG (i32 (IMPLICIT_DEF)), GR8:$src, sub_8bit),
+ (i32 1)))>;
}
// Mask unary operation
diff --git a/llvm/test/CodeGen/X86/avx512-ext.ll b/llvm/test/CodeGen/X86/avx512-ext.ll
index 5ce111806a2cc..fc1ba049c6912 100644
--- a/llvm/test/CodeGen/X86/avx512-ext.ll
+++ b/llvm/test/CodeGen/X86/avx512-ext.ll
@@ -1766,39 +1766,39 @@ define <8 x i32> @sext_8i1_8i32(<8 x i32> %a1, <8 x i32> %a2) nounwind {
define i16 @trunc_i32_to_i1(i32 %a) {
; KNL-LABEL: trunc_i32_to_i1:
; KNL: # %bb.0:
-; KNL-NEXT: movw $-4, %ax
-; KNL-NEXT: kmovw %eax, %k0
-; KNL-NEXT: kshiftrw $1, %k0, %k0
-; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: andl $1, %edi
-; KNL-NEXT: kmovw %edi, %k1
-; KNL-NEXT: korw %k1, %k0, %k0
+; KNL-NEXT: kmovw %edi, %k0
+; KNL-NEXT: movw $-4, %ax
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: kshiftrw $1, %k1, %k1
+; KNL-NEXT: kshiftlw $1, %k1, %k1
+; KNL-NEXT: korw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, %eax
; KNL-NEXT: # kill: def $ax killed $ax killed $eax
; KNL-NEXT: retq
;
; SKX-LABEL: trunc_i32_to_i1:
; SKX: # %bb.0:
-; SKX-NEXT: movw $-4, %ax
-; SKX-NEXT: kmovd %eax, %k0
-; SKX-NEXT: kshiftrw $1, %k0, %k0
-; SKX-NEXT: kshiftlw $1, %k0, %k0
; SKX-NEXT: andl $1, %edi
-; SKX-NEXT: kmovw %edi, %k1
-; SKX-NEXT: korw %k1, %k0, %k0
+; SKX-NEXT: kmovw %edi, %k0
+; SKX-NEXT: movw $-4, %ax
+; SKX-NEXT: kmovd %eax, %k1
+; SKX-NEXT: kshiftrw $1, %k1, %k1
+; SKX-NEXT: kshiftlw $1, %k1, %k1
+; SKX-NEXT: korw %k0, %k1, %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: # kill: def $ax killed $ax killed $eax
; SKX-NEXT: retq
;
; AVX512DQNOBW-LABEL: trunc_i32_to_i1:
; AVX512DQNOBW: # %bb.0:
-; AVX512DQNOBW-NEXT: movw $-4, %ax
-; AVX512DQNOBW-NEXT: kmovw %eax, %k0
-; AVX512DQNOBW-NEXT: kshiftrw $1, %k0, %k0
-; AVX512DQNOBW-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQNOBW-NEXT: andl $1, %edi
-; AVX512DQNOBW-NEXT: kmovw %edi, %k1
-; AVX512DQNOBW-NEXT: korw %k1, %k0, %k0
+; AVX512DQNOBW-NEXT: kmovw %edi, %k0
+; AVX512DQNOBW-NEXT: movw $-4, %ax
+; AVX512DQNOBW-NEXT: kmovw %eax, %k1
+; AVX512DQNOBW-NEXT: kshiftrw $1, %k1, %k1
+; AVX512DQNOBW-NEXT: kshiftlw $1, %k1, %k1
+; AVX512DQNOBW-NEXT: korw %k0, %k1, %k0
; AVX512DQNOBW-NEXT: kmovw %k0, %eax
; AVX512DQNOBW-NEXT: # kill: def $ax killed $ax killed $eax
; AVX512DQNOBW-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/avx512-insert-extract.ll b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
index 41bdaf21baa38..fd722e1beb135 100644
--- a/llvm/test/CodeGen/X86/avx512-insert-extract.ll
+++ b/llvm/test/CodeGen/X86/avx512-insert-extract.ll
@@ -2181,32 +2181,32 @@ define i128 @test_insertelement_variable_v128i1(<128 x i8> %a, i8 %b, i32 %index
define void @test_concat_v2i1(<2 x half>* %arg, <2 x half>* %arg1, <2 x half>* %arg2) {
; KNL-LABEL: test_concat_v2i1:
; KNL: ## %bb.0:
-; KNL-NEXT: movzwl (%rdi), %eax
-; KNL-NEXT: movzwl 2(%rdi), %ecx
+; KNL-NEXT: movzwl 2(%rdi), %eax
+; KNL-NEXT: movzwl (%rdi), %ecx
; KNL-NEXT: vmovd %ecx, %xmm0
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0
; KNL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; KNL-NEXT: vucomiss %xmm1, %xmm0
; KNL-NEXT: setb %cl
+; KNL-NEXT: andl $1, %ecx
; KNL-NEXT: kmovw %ecx, %k0
-; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: vmovd %eax, %xmm2
; KNL-NEXT: vcvtph2ps %xmm2, %xmm2
; KNL-NEXT: vucomiss %xmm1, %xmm2
; KNL-NEXT: setb %al
-; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: korw %k0, %k1, %k0
+; KNL-NEXT: kshiftlw $1, %k1, %k1
+; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; KNL-NEXT: vucomiss %xmm1, %xmm0
; KNL-NEXT: seta %al
+; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k1
-; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: vucomiss %xmm1, %xmm2
; KNL-NEXT: seta %al
-; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k2
-; KNL-NEXT: korw %k1, %k2, %k1
+; KNL-NEXT: kshiftlw $1, %k2, %k2
+; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kmovw %k1, %ecx
diff --git a/llvm/test/CodeGen/X86/avx512-mask-op.ll b/llvm/test/CodeGen/X86/avx512-mask-op.ll
index e67b81581396d..67067e3fff27d 100644
--- a/llvm/test/CodeGen/X86/avx512-mask-op.ll
+++ b/llvm/test/CodeGen/X86/avx512-mask-op.ll
@@ -5157,13 +5157,13 @@ define <64 x i1> @mask64_insert(i32 %a) {
; KNL-LABEL: mask64_insert:
; KNL: ## %bb.0:
; KNL-NEXT: movq %rdi, %rax
-; KNL-NEXT: movw $-4, %cx
-; KNL-NEXT: kmovw %ecx, %k0
-; KNL-NEXT: kshiftrw $1, %k0, %k0
-; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: andl $1, %esi
-; KNL-NEXT: kmovw %esi, %k1
-; KNL-NEXT: korw %k1, %k0, %k0
+; KNL-NEXT: kmovw %esi, %k0
+; KNL-NEXT: movw $-4, %cx
+; KNL-NEXT: kmovw %ecx, %k1
+; KNL-NEXT: kshiftrw $1, %k1, %k1
+; KNL-NEXT: kshiftlw $1, %k1, %k1
+; KNL-NEXT: korw %k0, %k1, %k0
; KNL-NEXT: kmovw %k0, (%rdi)
; KNL-NEXT: movw $-3, 6(%rdi)
; KNL-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
@@ -5198,13 +5198,13 @@ define <64 x i1> @mask64_insert(i32 %a) {
; AVX512DQ-LABEL: mask64_insert:
; AVX512DQ: ## %bb.0:
; AVX512DQ-NEXT: movq %rdi, %rax
-; AVX512DQ-NEXT: movw $-4, %cx
-; AVX512DQ-NEXT: kmovw %ecx, %k0
-; AVX512DQ-NEXT: kshiftrw $1, %k0, %k0
-; AVX512DQ-NEXT: kshiftlw $1, %k0, %k0
; AVX512DQ-NEXT: andl $1, %esi
-; AVX512DQ-NEXT: kmovw %esi, %k1
-; AVX512DQ-NEXT: korw %k1, %k0, %k0
+; AVX512DQ-NEXT: kmovw %esi, %k0
+; AVX512DQ-NEXT: movw $-4, %cx
+; AVX512DQ-NEXT: kmovw %ecx, %k1
+; AVX512DQ-NEXT: kshiftrw $1, %k1, %k1
+; AVX512DQ-NEXT: kshiftlw $1, %k1, %k1
+; AVX512DQ-NEXT: korw %k0, %k1, %k0
; AVX512DQ-NEXT: kmovw %k0, (%rdi)
; AVX512DQ-NEXT: movw $-3, 6(%rdi)
; AVX512DQ-NEXT: movl $-131075, 2(%rdi) ## imm = 0xFFFDFFFD
diff --git a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
index 719bd9f9d95f0..7dcae9a2d24ac 100644
--- a/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
+++ b/llvm/test/CodeGen/X86/avx512-vec-cmp.ll
@@ -1434,8 +1434,8 @@ define <4 x i32> @zext_bool_logic(<4 x i64> %cond1, <4 x i64> %cond2, <4 x i32>
define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; KNL-LABEL: half_vec_compare:
; KNL: ## %bb.0: ## %entry
-; KNL-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
-; KNL-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
+; KNL-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
+; KNL-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
; KNL-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; KNL-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
@@ -1443,17 +1443,17 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; KNL-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
; KNL-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
; KNL-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
+; KNL-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01]
; KNL-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
-; KNL-NEXT: kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01]
; KNL-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; KNL-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; KNL-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; KNL-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; KNL-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; KNL-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
-; KNL-NEXT: andl $1, %ecx ## encoding: [0x83,0xe1,0x01]
; KNL-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
-; KNL-NEXT: korw %k0, %k1, %k1 ## encoding: [0xc5,0xf4,0x45,0xc8]
+; KNL-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
+; KNL-NEXT: korw %k1, %k0, %k1 ## encoding: [0xc5,0xfc,0x45,0xc9]
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x25,0xc0,0xff]
; KNL-NEXT: vpmovdw %zmm0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x48,0x33,0xc0]
; KNL-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
@@ -1465,8 +1465,8 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
;
; AVX512BW-LABEL: half_vec_compare:
; AVX512BW: ## %bb.0: ## %entry
-; AVX512BW-NEXT: movzwl (%rdi), %eax ## encoding: [0x0f,0xb7,0x07]
-; AVX512BW-NEXT: movzwl 2(%rdi), %ecx ## encoding: [0x0f,0xb7,0x4f,0x02]
+; AVX512BW-NEXT: movzwl 2(%rdi), %eax ## encoding: [0x0f,0xb7,0x47,0x02]
+; AVX512BW-NEXT: movzwl (%rdi), %ecx ## encoding: [0x0f,0xb7,0x0f]
; AVX512BW-NEXT: vmovd %ecx, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1]
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; AVX512BW-NEXT: vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
@@ -1474,17 +1474,17 @@ define void @half_vec_compare(<2 x half>* %x, <2 x i8>* %y) {
; AVX512BW-NEXT: setp %cl ## encoding: [0x0f,0x9a,0xc1]
; AVX512BW-NEXT: setne %dl ## encoding: [0x0f,0x95,0xc2]
; AVX512BW-NEXT: orb %cl, %dl ## encoding: [0x08,0xca]
-; AVX512BW-NEXT: kmovd %edx, %k0 ## encoding: [0xc5,0xfb,0x92,0xc2]
-; AVX512BW-NEXT: kshiftlw $1, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x01]
+; AVX512BW-NEXT: andl $1, %edx ## encoding: [0x83,0xe2,0x01]
+; AVX512BW-NEXT: kmovw %edx, %k0 ## encoding: [0xc5,0xf8,0x92,0xc2]
; AVX512BW-NEXT: vmovd %eax, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0]
; AVX512BW-NEXT: vcvtph2ps %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x13,0xc0]
; AVX512BW-NEXT: vucomiss %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2e,0xc1]
; AVX512BW-NEXT: setp %al ## encoding: [0x0f,0x9a,0xc0]
; AVX512BW-NEXT: setne %cl ## encoding: [0x0f,0x95,0xc1]
; AVX512BW-NEXT: orb %al, %cl ## encoding: [0x08,0xc1]
-; AVX512BW-NEXT: andl $1, %ecx ## encoding: [0x83,0xe1,0x01]
-; AVX512BW-NEXT: kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
-; AVX512BW-NEXT: korw %k0, %k1, %k0 ## encoding: [0xc5,0xf4,0x45,0xc0]
+; AVX512BW-NEXT: kmovd %ecx, %k1 ## encoding: [0xc5,0xfb,0x92,0xc9]
+; AVX512BW-NEXT: kshiftlw $1, %k1, %k1 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc9,0x01]
+; AVX512BW-NEXT: korw %k1, %k0, %k0 ## encoding: [0xc5,0xfc,0x45,0xc1]
; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 ## encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
; AVX512BW-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x63,0xc0]
; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xdb,0x05,A,A,A,A]
diff --git a/llvm/test/CodeGen/X86/vec_saddo.ll b/llvm/test/CodeGen/X86/vec_saddo.ll
index e1f780da4fce6..6bee501e06a40 100644
--- a/llvm/test/CodeGen/X86/vec_saddo.ll
+++ b/llvm/test/CodeGen/X86/vec_saddo.ll
@@ -1372,48 +1372,48 @@ define <2 x i32> @saddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
;
; AVX512-LABEL: saddo_v2i128:
; AVX512: # %bb.0:
-; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT: testq %r9, %r9
-; AVX512-NEXT: setns %al
-; AVX512-NEXT: testq %rsi, %rsi
-; AVX512-NEXT: setns %bl
-; AVX512-NEXT: cmpb %al, %bl
-; AVX512-NEXT: sete %bpl
-; AVX512-NEXT: addq %r8, %rdi
-; AVX512-NEXT: adcq %r9, %rsi
-; AVX512-NEXT: setns %al
-; AVX512-NEXT: cmpb %al, %bl
-; AVX512-NEXT: setne %al
-; AVX512-NEXT: andb %bpl, %al
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT: movq %rcx, %rbp
-; AVX512-NEXT: adcq %r10, %rbp
+; AVX512-NEXT: movq %rcx, %r14
+; AVX512-NEXT: adcq %r11, %r14
; AVX512-NEXT: setns %bl
; AVX512-NEXT: testq %rcx, %rcx
; AVX512-NEXT: setns %cl
; AVX512-NEXT: cmpb %bl, %cl
-; AVX512-NEXT: setne %r8b
-; AVX512-NEXT: testq %r10, %r10
+; AVX512-NEXT: setne %bl
+; AVX512-NEXT: testq %r11, %r11
+; AVX512-NEXT: setns %al
+; AVX512-NEXT: cmpb %al, %cl
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: andb %bl, %al
+; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: testq %r9, %r9
+; AVX512-NEXT: setns %al
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: setns %cl
+; AVX512-NEXT: cmpb %al, %cl
+; AVX512-NEXT: sete %al
+; AVX512-NEXT: addq %r8, %rdi
+; AVX512-NEXT: adcq %r9, %rsi
; AVX512-NEXT: setns %bl
; AVX512-NEXT: cmpb %bl, %cl
-; AVX512-NEXT: sete %cl
-; AVX512-NEXT: andb %r8b, %cl
-; AVX512-NEXT: kmovd %ecx, %k0
+; AVX512-NEXT: setne %cl
+; AVX512-NEXT: andb %al, %cl
+; AVX512-NEXT: andl $1, %ecx
+; AVX512-NEXT: kmovw %ecx, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
-; AVX512-NEXT: andl $1, %eax
-; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: movq %rdx, 16(%r11)
-; AVX512-NEXT: movq %rdi, (%r11)
-; AVX512-NEXT: movq %rbp, 24(%r11)
-; AVX512-NEXT: movq %rsi, 8(%r11)
+; AVX512-NEXT: movq %rdx, 16(%r10)
+; AVX512-NEXT: movq %rdi, (%r10)
+; AVX512-NEXT: movq %r14, 24(%r10)
+; AVX512-NEXT: movq %rsi, 8(%r10)
; AVX512-NEXT: popq %rbx
-; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: popq %r14
; AVX512-NEXT: retq
%t = call {<2 x i128>, <2 x i1>} @llvm.sadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
diff --git a/llvm/test/CodeGen/X86/vec_smulo.ll b/llvm/test/CodeGen/X86/vec_smulo.ll
index ad0a8f8ff12a0..1b5aef61ebf3b 100644
--- a/llvm/test/CodeGen/X86/vec_smulo.ll
+++ b/llvm/test/CodeGen/X86/vec_smulo.ll
@@ -3942,39 +3942,39 @@ define <2 x i32> @smulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; AVX512-NEXT: pushq %rbx
; AVX512-NEXT: subq $24, %rsp
; AVX512-NEXT: movq %r8, %rax
-; AVX512-NEXT: movq %rcx, %r15
+; AVX512-NEXT: movq %rcx, %r14
; AVX512-NEXT: movq %rdx, %rbx
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r13
; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8
; AVX512-NEXT: movq %rax, %rdx
; AVX512-NEXT: movq %r9, %rcx
; AVX512-NEXT: callq __muloti4
-; AVX512-NEXT: movq %rax, %r14
+; AVX512-NEXT: movq %rax, %r13
; AVX512-NEXT: movq %rdx, %rbp
; AVX512-NEXT: movq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: leaq {{[0-9]+}}(%rsp), %r8
; AVX512-NEXT: movq %rbx, %rdi
-; AVX512-NEXT: movq %r15, %rsi
+; AVX512-NEXT: movq %r14, %rsi
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT: movq %r13, %rcx
+; AVX512-NEXT: movq %r12, %rcx
; AVX512-NEXT: callq __muloti4
; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: setne %cl
; AVX512-NEXT: kmovd %ecx, %k0
; AVX512-NEXT: cmpq $0, {{[0-9]+}}(%rsp)
; AVX512-NEXT: setne %cl
-; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: andl $1, %ecx
; AVX512-NEXT: kmovw %ecx, %k1
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: movq %rdx, 24(%r12)
-; AVX512-NEXT: movq %rax, 16(%r12)
-; AVX512-NEXT: movq %rbp, 8(%r12)
-; AVX512-NEXT: movq %r14, (%r12)
+; AVX512-NEXT: movq %rdx, 24(%r15)
+; AVX512-NEXT: movq %rax, 16(%r15)
+; AVX512-NEXT: movq %rbp, 8(%r15)
+; AVX512-NEXT: movq %r13, (%r15)
; AVX512-NEXT: addq $24, %rsp
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %r12
diff --git a/llvm/test/CodeGen/X86/vec_ssubo.ll b/llvm/test/CodeGen/X86/vec_ssubo.ll
index 4e2c3a57831f5..9981643ba2d4c 100644
--- a/llvm/test/CodeGen/X86/vec_ssubo.ll
+++ b/llvm/test/CodeGen/X86/vec_ssubo.ll
@@ -1381,48 +1381,48 @@ define <2 x i32> @ssubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
;
; AVX512-LABEL: ssubo_v2i128:
; AVX512: # %bb.0:
-; AVX512-NEXT: pushq %rbp
+; AVX512-NEXT: pushq %r14
; AVX512-NEXT: pushq %rbx
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT: testq %r9, %r9
-; AVX512-NEXT: setns %al
-; AVX512-NEXT: testq %rsi, %rsi
-; AVX512-NEXT: setns %bl
-; AVX512-NEXT: cmpb %al, %bl
-; AVX512-NEXT: setne %bpl
-; AVX512-NEXT: subq %r8, %rdi
-; AVX512-NEXT: sbbq %r9, %rsi
-; AVX512-NEXT: setns %al
-; AVX512-NEXT: cmpb %al, %bl
-; AVX512-NEXT: setne %al
-; AVX512-NEXT: andb %bpl, %al
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r11
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
-; AVX512-NEXT: movq %rcx, %rbp
-; AVX512-NEXT: sbbq %r10, %rbp
+; AVX512-NEXT: movq %rcx, %r14
+; AVX512-NEXT: sbbq %r11, %r14
; AVX512-NEXT: setns %bl
; AVX512-NEXT: testq %rcx, %rcx
; AVX512-NEXT: setns %cl
; AVX512-NEXT: cmpb %bl, %cl
-; AVX512-NEXT: setne %r8b
-; AVX512-NEXT: testq %r10, %r10
+; AVX512-NEXT: setne %bl
+; AVX512-NEXT: testq %r11, %r11
+; AVX512-NEXT: setns %al
+; AVX512-NEXT: cmpb %al, %cl
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: andb %bl, %al
+; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: testq %r9, %r9
+; AVX512-NEXT: setns %al
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: setns %cl
+; AVX512-NEXT: cmpb %al, %cl
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: subq %r8, %rdi
+; AVX512-NEXT: sbbq %r9, %rsi
; AVX512-NEXT: setns %bl
; AVX512-NEXT: cmpb %bl, %cl
; AVX512-NEXT: setne %cl
-; AVX512-NEXT: andb %r8b, %cl
-; AVX512-NEXT: kmovd %ecx, %k0
+; AVX512-NEXT: andb %al, %cl
+; AVX512-NEXT: andl $1, %ecx
+; AVX512-NEXT: kmovw %ecx, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
-; AVX512-NEXT: andl $1, %eax
-; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: movq %rdx, 16(%r11)
-; AVX512-NEXT: movq %rdi, (%r11)
-; AVX512-NEXT: movq %rbp, 24(%r11)
-; AVX512-NEXT: movq %rsi, 8(%r11)
+; AVX512-NEXT: movq %rdx, 16(%r10)
+; AVX512-NEXT: movq %rdi, (%r10)
+; AVX512-NEXT: movq %r14, 24(%r10)
+; AVX512-NEXT: movq %rsi, 8(%r10)
; AVX512-NEXT: popq %rbx
-; AVX512-NEXT: popq %rbp
+; AVX512-NEXT: popq %r14
; AVX512-NEXT: retq
%t = call {<2 x i128>, <2 x i1>} @llvm.ssub.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1)
%val = extractvalue {<2 x i128>, <2 x i1>} %t, 0
diff --git a/llvm/test/CodeGen/X86/vec_uaddo.ll b/llvm/test/CodeGen/X86/vec_uaddo.ll
index 9a153253a1695..c34653be4a02c 100644
--- a/llvm/test/CodeGen/X86/vec_uaddo.ll
+++ b/llvm/test/CodeGen/X86/vec_uaddo.ll
@@ -1282,16 +1282,16 @@ define <2 x i32> @uaddo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; AVX512-LABEL: uaddo_v2i128:
; AVX512: # %bb.0:
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT: addq %r8, %rdi
-; AVX512-NEXT: adcq %r9, %rsi
-; AVX512-NEXT: setb %r8b
; AVX512-NEXT: addq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: adcq {{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: setb %al
; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: addq %r8, %rdi
+; AVX512-NEXT: adcq %r9, %rsi
+; AVX512-NEXT: setb %al
+; AVX512-NEXT: andl $1, %eax
+; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
-; AVX512-NEXT: andl $1, %r8d
-; AVX512-NEXT: kmovw %r8d, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
diff --git a/llvm/test/CodeGen/X86/vec_umulo.ll b/llvm/test/CodeGen/X86/vec_umulo.ll
index 54bb86dc5a0f4..cc25fd5bec783 100644
--- a/llvm/test/CodeGen/X86/vec_umulo.ll
+++ b/llvm/test/CodeGen/X86/vec_umulo.ll
@@ -3689,68 +3689,66 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; AVX512-NEXT: pushq %r13
; AVX512-NEXT: pushq %r12
; AVX512-NEXT: pushq %rbx
-; AVX512-NEXT: movq %r9, %r10
-; AVX512-NEXT: movq %rcx, %r9
-; AVX512-NEXT: movq %rdx, %r11
-; AVX512-NEXT: movq %rsi, %rax
-; AVX512-NEXT: movq %rdi, %rsi
+; AVX512-NEXT: movq %rcx, %rax
+; AVX512-NEXT: movq %rdx, %r12
+; AVX512-NEXT: movq %rdi, %r11
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r14
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r15
-; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r12
+; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
; AVX512-NEXT: testq %r10, %r10
; AVX512-NEXT: setne %dl
-; AVX512-NEXT: testq %rax, %rax
-; AVX512-NEXT: setne %bl
-; AVX512-NEXT: andb %dl, %bl
-; AVX512-NEXT: mulq %r8
-; AVX512-NEXT: movq %rax, %r13
+; AVX512-NEXT: testq %rcx, %rcx
+; AVX512-NEXT: setne %r13b
+; AVX512-NEXT: andb %dl, %r13b
+; AVX512-NEXT: mulq %r15
+; AVX512-NEXT: movq %rax, %rdi
; AVX512-NEXT: seto %bpl
; AVX512-NEXT: movq %r10, %rax
-; AVX512-NEXT: mulq %rdi
-; AVX512-NEXT: movq %rax, %rdi
+; AVX512-NEXT: mulq %r12
+; AVX512-NEXT: movq %rax, %rbx
; AVX512-NEXT: seto %cl
; AVX512-NEXT: orb %bpl, %cl
-; AVX512-NEXT: addq %r13, %rdi
+; AVX512-NEXT: addq %rdi, %rbx
+; AVX512-NEXT: movq %r12, %rax
+; AVX512-NEXT: mulq %r15
+; AVX512-NEXT: movq %rax, %r10
+; AVX512-NEXT: movq %rdx, %r15
+; AVX512-NEXT: addq %rbx, %r15
+; AVX512-NEXT: setb %al
+; AVX512-NEXT: orb %cl, %al
+; AVX512-NEXT: orb %r13b, %al
+; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: testq %r9, %r9
+; AVX512-NEXT: setne %al
+; AVX512-NEXT: testq %rsi, %rsi
+; AVX512-NEXT: setne %cl
+; AVX512-NEXT: andb %al, %cl
; AVX512-NEXT: movq %rsi, %rax
; AVX512-NEXT: mulq %r8
-; AVX512-NEXT: movq %rax, %r8
-; AVX512-NEXT: movq %rdx, %r10
-; AVX512-NEXT: addq %rdi, %r10
-; AVX512-NEXT: setb %sil
-; AVX512-NEXT: orb %cl, %sil
-; AVX512-NEXT: orb %bl, %sil
-; AVX512-NEXT: testq %r12, %r12
-; AVX512-NEXT: setne %al
-; AVX512-NEXT: testq %r9, %r9
-; AVX512-NEXT: setne %bpl
-; AVX512-NEXT: andb %al, %bpl
+; AVX512-NEXT: movq %rax, %rsi
+; AVX512-NEXT: seto %bpl
; AVX512-NEXT: movq %r9, %rax
-; AVX512-NEXT: mulq %r15
-; AVX512-NEXT: movq %rax, %rdi
-; AVX512-NEXT: seto %r9b
-; AVX512-NEXT: movq %r12, %rax
; AVX512-NEXT: mulq %r11
-; AVX512-NEXT: movq %rax, %rbx
-; AVX512-NEXT: seto %cl
-; AVX512-NEXT: orb %r9b, %cl
-; AVX512-NEXT: addq %rdi, %rbx
+; AVX512-NEXT: movq %rax, %rdi
+; AVX512-NEXT: seto %bl
+; AVX512-NEXT: orb %bpl, %bl
+; AVX512-NEXT: addq %rsi, %rdi
; AVX512-NEXT: movq %r11, %rax
-; AVX512-NEXT: mulq %r15
-; AVX512-NEXT: addq %rbx, %rdx
-; AVX512-NEXT: setb %dil
-; AVX512-NEXT: orb %cl, %dil
-; AVX512-NEXT: orb %bpl, %dil
-; AVX512-NEXT: kmovd %edi, %k0
-; AVX512-NEXT: kshiftlw $1, %k0, %k0
+; AVX512-NEXT: mulq %r8
+; AVX512-NEXT: addq %rdi, %rdx
+; AVX512-NEXT: setb %sil
+; AVX512-NEXT: orb %bl, %sil
+; AVX512-NEXT: orb %cl, %sil
; AVX512-NEXT: andl $1, %esi
; AVX512-NEXT: kmovw %esi, %k1
+; AVX512-NEXT: kshiftlw $1, %k0, %k0
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
-; AVX512-NEXT: movq %rax, 16(%r14)
-; AVX512-NEXT: movq %r8, (%r14)
-; AVX512-NEXT: movq %rdx, 24(%r14)
-; AVX512-NEXT: movq %r10, 8(%r14)
+; AVX512-NEXT: movq %r10, 16(%r14)
+; AVX512-NEXT: movq %rax, (%r14)
+; AVX512-NEXT: movq %r15, 24(%r14)
+; AVX512-NEXT: movq %rdx, 8(%r14)
; AVX512-NEXT: popq %rbx
; AVX512-NEXT: popq %r12
; AVX512-NEXT: popq %r13
diff --git a/llvm/test/CodeGen/X86/vec_usubo.ll b/llvm/test/CodeGen/X86/vec_usubo.ll
index 0381394e74134..76c3e5ad32909 100644
--- a/llvm/test/CodeGen/X86/vec_usubo.ll
+++ b/llvm/test/CodeGen/X86/vec_usubo.ll
@@ -1329,16 +1329,16 @@ define <2 x i32> @usubo_v2i128(<2 x i128> %a0, <2 x i128> %a1, <2 x i128>* %p2)
; AVX512-LABEL: usubo_v2i128:
; AVX512: # %bb.0:
; AVX512-NEXT: movq {{[0-9]+}}(%rsp), %r10
-; AVX512-NEXT: subq %r8, %rdi
-; AVX512-NEXT: sbbq %r9, %rsi
-; AVX512-NEXT: setb %r8b
; AVX512-NEXT: subq {{[0-9]+}}(%rsp), %rdx
; AVX512-NEXT: sbbq {{[0-9]+}}(%rsp), %rcx
; AVX512-NEXT: setb %al
; AVX512-NEXT: kmovd %eax, %k0
+; AVX512-NEXT: subq %r8, %rdi
+; AVX512-NEXT: sbbq %r9, %rsi
+; AVX512-NEXT: setb %al
+; AVX512-NEXT: andl $1, %eax
+; AVX512-NEXT: kmovw %eax, %k1
; AVX512-NEXT: kshiftlw $1, %k0, %k0
-; AVX512-NEXT: andl $1, %r8d
-; AVX512-NEXT: kmovw %r8d, %k1
; AVX512-NEXT: korw %k0, %k1, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
More information about the llvm-commits
mailing list