[llvm] r298984 - [AVX-512] Remove explicit KMOVWrk/KMOVWKr instructions from patterns where we can just use COPY_TO_REGCLASS instead.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Tue Mar 28 23:55:29 PDT 2017
Author: ctopper
Date: Wed Mar 29 01:55:28 2017
New Revision: 298984
URL: http://llvm.org/viewvc/llvm-project?rev=298984&view=rev
Log:
[AVX-512] Remove explicit KMOVWrk/KMOVWKr instructions from patterns where we can just use COPY_TO_REGCLASS instead.
This will result in a KMOVW or KMOVD being emitted during register allocation. And in at least some cases this might allow the register coalescer to remove the copy all together.
Modified:
llvm/trunk/lib/Target/X86/X86InstrAVX512.td
llvm/trunk/test/CodeGen/X86/avx512-cmp.ll
llvm/trunk/test/CodeGen/X86/avx512-ext.ll
llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll
llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
llvm/trunk/test/CodeGen/X86/pr27591.ll
llvm/trunk/test/CodeGen/X86/pr28173.ll
llvm/trunk/test/CodeGen/X86/pr32241.ll
llvm/trunk/test/CodeGen/X86/pr32256.ll
llvm/trunk/test/CodeGen/X86/xmulo.ll
Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Mar 29 01:55:28 2017
@@ -2279,44 +2279,41 @@ let Predicates = [HasBWI] in {
let Predicates = [HasAVX512] in {
def : Pat<(i1 (trunc (i64 GR64:$src))),
- (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
- (i32 1))), VK1)>;
+ (COPY_TO_REGCLASS (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
+ (i32 1)), VK1)>;
def : Pat<(i1 (trunc (i32 GR32:$src))),
- (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>;
+ (COPY_TO_REGCLASS (AND32ri8 $src, (i32 1)), VK1)>;
def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))),
(COPY_TO_REGCLASS GR32:$src, VK1)>;
def : Pat<(i1 (trunc (i8 GR8:$src))),
(COPY_TO_REGCLASS
- (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR8:$src, sub_8bit), (i32 1))),
- VK1)>;
+ (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR8:$src, sub_8bit), (i32 1)), VK1)>;
def : Pat<(i1 (trunc (i16 GR16:$src))),
(COPY_TO_REGCLASS
- (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
- GR16:$src, sub_16bit), (i32 1))),
- VK1)>;
+ (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+ GR16:$src, sub_16bit), (i32 1)), VK1)>;
def : Pat<(i32 (zext VK1:$src)),
- (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1))>;
def : Pat<(i32 (anyext VK1:$src)),
(COPY_TO_REGCLASS VK1:$src, GR32)>;
def : Pat<(i8 (zext VK1:$src)),
(EXTRACT_SUBREG
- (AND32ri8 (KMOVWrk
- (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_8bit)>;
def : Pat<(i8 (anyext VK1:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>;
def : Pat<(i64 (zext VK1:$src)),
- (AND64ri8 (SUBREG_TO_REG (i64 0),
- (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
+ (SUBREG_TO_REG (i64 0),
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_32bit)>;
def : Pat<(i64 (anyext VK1:$src)),
(INSERT_SUBREG (i64 (IMPLICIT_DEF)),
@@ -2324,8 +2321,7 @@ let Predicates = [HasAVX512] in {
def : Pat<(i16 (zext VK1:$src)),
(EXTRACT_SUBREG
- (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
- sub_16bit)>;
+ (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_16bit)>;
def : Pat<(i16 (anyext VK1:$src)),
(EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>;
Modified: llvm/trunk/test/CodeGen/X86/avx512-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cmp.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cmp.ll Wed Mar 29 01:55:28 2017
@@ -158,26 +158,47 @@ B:
}
define i32 @test10(i64 %b, i64 %c, i1 %d) {
-; ALL-LABEL: test10:
-; ALL: ## BB#0:
-; ALL-NEXT: andl $1, %edx
-; ALL-NEXT: kmovw %edx, %k0
-; ALL-NEXT: cmpq %rsi, %rdi
-; ALL-NEXT: sete %al
-; ALL-NEXT: andl $1, %eax
-; ALL-NEXT: kmovw %eax, %k1
-; ALL-NEXT: korw %k1, %k0, %k1
-; ALL-NEXT: kxorw %k1, %k0, %k0
-; ALL-NEXT: kmovw %k0, %eax
-; ALL-NEXT: andl $1, %eax
-; ALL-NEXT: testb %al, %al
-; ALL-NEXT: je LBB8_1
-; ALL-NEXT: ## BB#2: ## %if.end.i
-; ALL-NEXT: movl $6, %eax
-; ALL-NEXT: retq
-; ALL-NEXT: LBB8_1: ## %if.then.i
-; ALL-NEXT: movl $5, %eax
-; ALL-NEXT: retq
+; KNL-LABEL: test10:
+; KNL: ## BB#0:
+; KNL-NEXT: andl $1, %edx
+; KNL-NEXT: kmovw %edx, %k0
+; KNL-NEXT: cmpq %rsi, %rdi
+; KNL-NEXT: sete %al
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: kmovw %eax, %k1
+; KNL-NEXT: korw %k1, %k0, %k1
+; KNL-NEXT: kxorw %k1, %k0, %k0
+; KNL-NEXT: kmovw %k0, %eax
+; KNL-NEXT: andl $1, %eax
+; KNL-NEXT: testb %al, %al
+; KNL-NEXT: je LBB8_1
+; KNL-NEXT: ## BB#2: ## %if.end.i
+; KNL-NEXT: movl $6, %eax
+; KNL-NEXT: retq
+; KNL-NEXT: LBB8_1: ## %if.then.i
+; KNL-NEXT: movl $5, %eax
+; KNL-NEXT: retq
+;
+; SKX-LABEL: test10:
+; SKX: ## BB#0:
+; SKX-NEXT: andl $1, %edx
+; SKX-NEXT: kmovd %edx, %k0
+; SKX-NEXT: cmpq %rsi, %rdi
+; SKX-NEXT: sete %al
+; SKX-NEXT: andl $1, %eax
+; SKX-NEXT: kmovd %eax, %k1
+; SKX-NEXT: korw %k1, %k0, %k1
+; SKX-NEXT: kxorw %k1, %k0, %k0
+; SKX-NEXT: kmovd %k0, %eax
+; SKX-NEXT: andl $1, %eax
+; SKX-NEXT: testb %al, %al
+; SKX-NEXT: je LBB8_1
+; SKX-NEXT: ## BB#2: ## %if.end.i
+; SKX-NEXT: movl $6, %eax
+; SKX-NEXT: retq
+; SKX-NEXT: LBB8_1: ## %if.then.i
+; SKX-NEXT: movl $5, %eax
+; SKX-NEXT: retq
%cmp8.i = icmp eq i64 %b, %c
%or1 = or i1 %d, %cmp8.i
Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Wed Mar 29 01:55:28 2017
@@ -1448,7 +1448,7 @@ define i16 @trunc_i32_to_i1(i32 %a) {
; SKX-LABEL: trunc_i32_to_i1:
; SKX: ## BB#0:
; SKX-NEXT: andl $1, %edi
-; SKX-NEXT: kmovw %edi, %k0
+; SKX-NEXT: kmovd %edi, %k0
; SKX-NEXT: movw $-4, %ax
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: kshiftrw $1, %k1, %k1
Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Wed Mar 29 01:55:28 2017
@@ -275,7 +275,7 @@ define <16 x i32> @test11(<16 x i32>%a,
; SKX-NEXT: vpcmpltud %zmm1, %zmm0, %k0
; SKX-NEXT: kshiftlw $11, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je LBB10_2
@@ -317,7 +317,7 @@ define i64 @test12(<16 x i64>%a, <16 x i
; SKX-NEXT: kunpckbw %k0, %k1, %k0
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: cmoveq %rsi, %rdi
@@ -351,7 +351,7 @@ define i16 @test13(i32 %a, i32 %b) {
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: kmovw %eax, %k0
+; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: movw $-4, %ax
; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: kshiftrw $1, %k1, %k1
@@ -384,7 +384,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64
; SKX-NEXT: vpcmpgtq %zmm0, %zmm1, %k0
; SKX-NEXT: kshiftlb $3, %k0, %k0
; SKX-NEXT: kshiftrb $7, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: testb %al, %al
; SKX-NEXT: cmoveq %rsi, %rdi
@@ -1284,7 +1284,7 @@ define i32 @test_insertelement_v32i1(i32
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: kmovw %eax, %k0
+; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: vpcmpltud %zmm2, %zmm0, %k1
; SKX-NEXT: vpcmpltud %zmm3, %zmm1, %k2
; SKX-NEXT: kunpckwd %k1, %k2, %k1
@@ -1350,7 +1350,7 @@ define i8 @test_iinsertelement_v4i1(i32
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: kmovw %eax, %k0
+; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: vpcmpltud %xmm1, %xmm0, %k1
; SKX-NEXT: vpmovm2d %k1, %xmm0
; SKX-NEXT: vpmovm2d %k0, %xmm1
@@ -1397,7 +1397,7 @@ define i8 @test_iinsertelement_v2i1(i32
; SKX-NEXT: cmpl %esi, %edi
; SKX-NEXT: setb %al
; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: kmovw %eax, %k0
+; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
; SKX-NEXT: kshiftlw $1, %k1, %k1
; SKX-NEXT: kshiftrw $1, %k1, %k1
@@ -1431,7 +1431,7 @@ define zeroext i8 @test_extractelement_v
; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: cmpb $1, %al
; SKX-NEXT: movb $3, %al
@@ -1461,7 +1461,7 @@ define zeroext i8 @extractelement_v2i1_a
; SKX-NEXT: vpcmpnleuq %xmm1, %xmm0, %k0
; SKX-NEXT: kshiftlw $15, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: cmpb $1, %al
; SKX-NEXT: movb $3, %al
@@ -1491,7 +1491,7 @@ define zeroext i8 @test_extractelement_v
; SKX-NEXT: vpcmpnleud %xmm1, %xmm0, %k0
; SKX-NEXT: kshiftlw $12, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: retq
%t1 = icmp ugt <4 x i32> %a, %b
@@ -1516,7 +1516,7 @@ define zeroext i8 @test_extractelement_v
; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0
; SKX-NEXT: kshiftld $29, %k0, %k0
; SKX-NEXT: kshiftrd $31, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@@ -1543,7 +1543,7 @@ define zeroext i8 @test_extractelement_v
; SKX: ## BB#0:
; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
; SKX-NEXT: kshiftrq $63, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: cmpb $1, %al
; SKX-NEXT: movb $3, %al
@@ -1574,7 +1574,7 @@ define zeroext i8 @extractelement_v64i1_
; SKX: ## BB#0:
; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0
; SKX-NEXT: kshiftrq $63, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: cmpb $1, %al
; SKX-NEXT: movb $3, %al
Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Mar 29 01:55:28 2017
@@ -332,7 +332,7 @@ define i32 @zext_test1(<16 x i32> %a, <1
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
; SKX-NEXT: kshiftlw $10, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
@@ -342,7 +342,7 @@ define i32 @zext_test1(<16 x i32> %a, <1
; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kshiftlw $10, %k0, %k0
; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
-; AVX512BW-NEXT: kmovw %k0, %eax
+; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
@@ -378,7 +378,7 @@ define i16 @zext_test2(<16 x i32> %a, <1
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
; SKX-NEXT: kshiftlw $10, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; SKX-NEXT: vzeroupper
@@ -389,7 +389,7 @@ define i16 @zext_test2(<16 x i32> %a, <1
; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kshiftlw $10, %k0, %k0
; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
-; AVX512BW-NEXT: kmovw %k0, %eax
+; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: ## kill: %AX<def> %AX<kill> %EAX<kill>
; AVX512BW-NEXT: vzeroupper
@@ -427,7 +427,7 @@ define i8 @zext_test3(<16 x i32> %a, <16
; SKX-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
; SKX-NEXT: kshiftlw $10, %k0, %k0
; SKX-NEXT: kshiftrw $15, %k0, %k0
-; SKX-NEXT: kmovw %k0, %eax
+; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: andl $1, %eax
; SKX-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; SKX-NEXT: vzeroupper
@@ -438,7 +438,7 @@ define i8 @zext_test3(<16 x i32> %a, <16
; AVX512BW-NEXT: vpcmpnleud %zmm1, %zmm0, %k0
; AVX512BW-NEXT: kshiftlw $10, %k0, %k0
; AVX512BW-NEXT: kshiftrw $15, %k0, %k0
-; AVX512BW-NEXT: kmovw %k0, %eax
+; AVX512BW-NEXT: kmovd %k0, %eax
; AVX512BW-NEXT: andl $1, %eax
; AVX512BW-NEXT: ## kill: %AL<def> %AL<kill> %EAX<kill>
; AVX512BW-NEXT: vzeroupper
@@ -1086,7 +1086,7 @@ define <64 x i8> @test17(i64 %x, i32 %y,
; SKX-NEXT: cmpl %edx, %esi
; SKX-NEXT: setg %al
; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: kmovw %eax, %k1
+; SKX-NEXT: kmovd %eax, %k1
; SKX-NEXT: vpmovm2b %k1, %zmm0
; SKX-NEXT: vpsllq $40, %xmm0, %xmm0
; SKX-NEXT: vpmovm2b %k0, %zmm1
@@ -1104,7 +1104,7 @@ define <64 x i8> @test17(i64 %x, i32 %y,
; AVX512BW-NEXT: cmpl %edx, %esi
; AVX512BW-NEXT: setg %al
; AVX512BW-NEXT: andl $1, %eax
-; AVX512BW-NEXT: kmovw %eax, %k1
+; AVX512BW-NEXT: kmovd %eax, %k1
; AVX512BW-NEXT: vpmovm2b %k1, %zmm0
; AVX512BW-NEXT: vpsllq $40, %xmm0, %xmm0
; AVX512BW-NEXT: vpmovm2b %k0, %zmm1
@@ -1395,7 +1395,7 @@ define void @store_v1i1(<1 x i1> %c , <1
; SKX-LABEL: store_v1i1:
; SKX: ## BB#0:
; SKX-NEXT: andl $1, %edi
-; SKX-NEXT: kmovw %edi, %k0
+; SKX-NEXT: kmovd %edi, %k0
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: kshiftrw $15, %k1, %k1
; SKX-NEXT: kxorw %k1, %k0, %k0
@@ -1405,7 +1405,7 @@ define void @store_v1i1(<1 x i1> %c , <1
; AVX512BW-LABEL: store_v1i1:
; AVX512BW: ## BB#0:
; AVX512BW-NEXT: andl $1, %edi
-; AVX512BW-NEXT: kmovw %edi, %k0
+; AVX512BW-NEXT: kmovd %edi, %k0
; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
; AVX512BW-NEXT: kshiftrw $15, %k1, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
@@ -1629,7 +1629,7 @@ define void @f1(i32 %c) {
; SKX-NEXT: movzbl {{.*}}(%rip), %edi
; SKX-NEXT: movl %edi, %eax
; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: kmovw %eax, %k0
+; SKX-NEXT: kmovd %eax, %k0
; SKX-NEXT: kxnorw %k0, %k0, %k1
; SKX-NEXT: kshiftrw $15, %k1, %k1
; SKX-NEXT: kxorw %k1, %k0, %k0
@@ -1642,7 +1642,7 @@ define void @f1(i32 %c) {
; AVX512BW-NEXT: movzbl {{.*}}(%rip), %edi
; AVX512BW-NEXT: movl %edi, %eax
; AVX512BW-NEXT: andl $1, %eax
-; AVX512BW-NEXT: kmovw %eax, %k0
+; AVX512BW-NEXT: kmovd %eax, %k0
; AVX512BW-NEXT: kxnorw %k0, %k0, %k1
; AVX512BW-NEXT: kshiftrw $15, %k1, %k1
; AVX512BW-NEXT: kxorw %k1, %k0, %k0
Modified: llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll Wed Mar 29 01:55:28 2017
@@ -142,7 +142,7 @@ define <4 x float> @test11(<4 x float> %
; SKX: # BB#0: # %entry
; SKX-NEXT: vxorps {{.*}}(%rip){1to4}, %xmm2, %xmm0
; SKX-NEXT: andl $1, %edi
-; SKX-NEXT: kmovw %edi, %k1
+; SKX-NEXT: kmovd %edi, %k1
; SKX-NEXT: vfmadd231ss %xmm1, %xmm1, %xmm0 {%k1}
; SKX-NEXT: retq
;
@@ -183,13 +183,21 @@ entry:
}
define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: test13:
-; CHECK: # BB#0: # %entry
-; CHECK-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k1
-; CHECK-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1}
-; CHECK-NEXT: retq
+; SKX-LABEL: test13:
+; SKX: # BB#0: # %entry
+; SKX-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
+; SKX-NEXT: andl $1, %edi
+; SKX-NEXT: kmovd %edi, %k1
+; SKX-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1}
+; SKX-NEXT: retq
+;
+; KNL-LABEL: test13:
+; KNL: # BB#0: # %entry
+; KNL-NEXT: vxorpd {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: andl $1, %edi
+; KNL-NEXT: kmovw %edi, %k1
+; KNL-NEXT: vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1}
+; KNL-NEXT: retq
entry:
%sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
%0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Wed Mar 29 01:55:28 2017
@@ -1691,12 +1691,12 @@ define <16 x i64> @test_gather_16i64(<16
; KNL_32-LABEL: test_gather_16i64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Lcfi0:
+; KNL_32-NEXT: .Lcfi4:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Lcfi1:
+; KNL_32-NEXT: .Lcfi5:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Lcfi2:
+; KNL_32-NEXT: .Lcfi6:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
@@ -1814,12 +1814,12 @@ define <16 x double> @test_gather_16f64(
; KNL_32-LABEL: test_gather_16f64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Lcfi3:
+; KNL_32-NEXT: .Lcfi7:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Lcfi4:
+; KNL_32-NEXT: .Lcfi8:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Lcfi5:
+; KNL_32-NEXT: .Lcfi9:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
@@ -1936,12 +1936,12 @@ define void @test_scatter_16i64(<16 x i6
; KNL_32-LABEL: test_scatter_16i64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Lcfi6:
+; KNL_32-NEXT: .Lcfi10:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Lcfi7:
+; KNL_32-NEXT: .Lcfi11:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Lcfi8:
+; KNL_32-NEXT: .Lcfi12:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
@@ -2058,12 +2058,12 @@ define void @test_scatter_16f64(<16 x do
; KNL_32-LABEL: test_scatter_16f64:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Lcfi9:
+; KNL_32-NEXT: .Lcfi13:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Lcfi10:
+; KNL_32-NEXT: .Lcfi14:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Lcfi11:
+; KNL_32-NEXT: .Lcfi15:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-64, %esp
; KNL_32-NEXT: subl $64, %esp
@@ -2139,12 +2139,12 @@ define <4 x i64> @test_pr28312(<4 x i64*
; KNL_32-LABEL: test_pr28312:
; KNL_32: # BB#0:
; KNL_32-NEXT: pushl %ebp
-; KNL_32-NEXT: .Lcfi12:
+; KNL_32-NEXT: .Lcfi16:
; KNL_32-NEXT: .cfi_def_cfa_offset 8
-; KNL_32-NEXT: .Lcfi13:
+; KNL_32-NEXT: .Lcfi17:
; KNL_32-NEXT: .cfi_offset %ebp, -8
; KNL_32-NEXT: movl %esp, %ebp
-; KNL_32-NEXT: .Lcfi14:
+; KNL_32-NEXT: .Lcfi18:
; KNL_32-NEXT: .cfi_def_cfa_register %ebp
; KNL_32-NEXT: andl $-32, %esp
; KNL_32-NEXT: subl $32, %esp
Modified: llvm/trunk/test/CodeGen/X86/pr27591.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr27591.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr27591.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr27591.ll Wed Mar 29 01:55:28 2017
@@ -12,7 +12,7 @@ define void @test1(i32 %x) #0 {
; CHECK-NEXT: # implicit-def: %EDI
; CHECK-NEXT: movb %al, %dil
; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k0
+; CHECK-NEXT: kmovd %edi, %k0
; CHECK-NEXT: kmovd %k0, %edi
; CHECK-NEXT: movb %dil, %al
; CHECK-NEXT: andb $1, %al
@@ -35,8 +35,8 @@ define void @test2(i32 %x) #0 {
; CHECK-NEXT: # implicit-def: %EDI
; CHECK-NEXT: movb %al, %dil
; CHECK-NEXT: andl $1, %edi
-; CHECK-NEXT: kmovw %edi, %k0
-; CHECK-NEXT: kmovw %k0, %edi
+; CHECK-NEXT: kmovd %edi, %k0
+; CHECK-NEXT: kmovd %k0, %edi
; CHECK-NEXT: andl $1, %edi
; CHECK-NEXT: movb %dil, %al
; CHECK-NEXT: xorl %edi, %edi
Modified: llvm/trunk/test/CodeGen/X86/pr28173.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr28173.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr28173.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr28173.ll Wed Mar 29 01:55:28 2017
@@ -5,9 +5,6 @@
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-; Note that the kmovs should really *not* appear in the output, this is an
-; artifact of the current poor lowering. This is tracked by PR28175.
-
define i64 @foo64(i1 zeroext %i) #0 {
; CHECK-LABEL: foo64:
; CHECK: # BB#0:
@@ -43,25 +40,13 @@ end:
ret i16 %v
}
-; This code is still not optimal
define i16 @foo16_1(i1 zeroext %i, i32 %j) #0 {
-; KNL-LABEL: foo16_1:
-; KNL: # BB#0:
-; KNL-NEXT: kmovw %edi, %k0
-; KNL-NEXT: kmovw %k0, %eax
-; KNL-NEXT: andl $1, %eax
-; KNL-NEXT: orl $2, %eax
-; KNL-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
-; KNL-NEXT: retq
-;
-; SKX-LABEL: foo16_1:
-; SKX: # BB#0:
-; SKX-NEXT: kmovd %edi, %k0
-; SKX-NEXT: kmovw %k0, %eax
-; SKX-NEXT: andl $1, %eax
-; SKX-NEXT: orl $2, %eax
-; SKX-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill>
-; SKX-NEXT: retq
+; CHECK-LABEL: foo16_1:
+; CHECK: # BB#0:
+; CHECK-NEXT: andl $1, %edi
+; CHECK-NEXT: orl $2, %edi
+; CHECK-NEXT: movl %edi, %eax
+; CHECK-NEXT: retq
br label %bb
bb:
Modified: llvm/trunk/test/CodeGen/X86/pr32241.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr32241.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr32241.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr32241.ll Wed Mar 29 01:55:28 2017
@@ -41,7 +41,7 @@ define i32 @_Z3foov() {
; CHECK-NEXT: jmp .LBB0_4
; CHECK-NEXT: .LBB0_4: # %lor.end5
; CHECK-NEXT: kmovw {{[0-9]+}}(%esp), %k0 # 2-byte Reload
-; CHECK-NEXT: kmovw %k0, %eax
+; CHECK-NEXT: kmovd %k0, %eax
; CHECK-NEXT: andl $1, %eax
; CHECK-NEXT: movw %ax, %cx
; CHECK-NEXT: movw %cx, {{[0-9]+}}(%esp)
Modified: llvm/trunk/test/CodeGen/X86/pr32256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr32256.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr32256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr32256.ll Wed Mar 29 01:55:28 2017
@@ -16,7 +16,7 @@ define void @_Z1av() {
; CHECK-NEXT: # implicit-def: %EAX
; CHECK-NEXT: movb %cl, %al
; CHECK-NEXT: andl $1, %eax
-; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: kmovd %eax, %k1
; CHECK-NEXT: kmovq %k1, %k2
; CHECK-NEXT: kxnorw %k0, %k0, %k3
; CHECK-NEXT: kshiftrw $15, %k3, %k3
Modified: llvm/trunk/test/CodeGen/X86/xmulo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xmulo.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xmulo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xmulo.ll Wed Mar 29 01:55:28 2017
@@ -713,10 +713,10 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
; KNL-LABEL: bug27873:
; KNL: ## BB#0:
; KNL-NEXT: andl $1, %esi
-; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: movl $160, %ecx
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: mulq %rcx
+; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: seto %al
; KNL-NEXT: andl $1, %eax
; KNL-NEXT: kmovw %eax, %k1
More information about the llvm-commits
mailing list