[llvm] r298984 - [AVX-512] Remove explicit KMOVWrk/KMOVWKr instructions from patterns where we can just use COPY_TO_REGCLASS instead.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Tue Mar 28 23:55:29 PDT 2017


Author: ctopper
Date: Wed Mar 29 01:55:28 2017
New Revision: 298984

URL: http://llvm.org/viewvc/llvm-project?rev=298984&view=rev
Log:
[AVX-512] Remove explicit KMOVWrk/KMOVWKr instructions from patterns where we can just use COPY_TO_REGCLASS instead.

This will result in a KMOVW or KMOVD being emitted during register allocation. And in at least some cases this might allow the register coalescer to remove the copy all together.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-cmp.ll
    llvm/trunk/test/CodeGen/X86/avx512-ext.ll
    llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll
    llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
    llvm/trunk/test/CodeGen/X86/pr27591.ll
    llvm/trunk/test/CodeGen/X86/pr28173.ll
    llvm/trunk/test/CodeGen/X86/pr32241.ll
    llvm/trunk/test/CodeGen/X86/pr32256.ll
    llvm/trunk/test/CodeGen/X86/xmulo.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Mar 29 01:55:28 2017
@@ -2279,44 +2279,41 @@ let Predicates = [HasBWI] in {
 
 let Predicates = [HasAVX512] in {
   def : Pat<(i1 (trunc (i64 GR64:$src))),
-            (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
-                                        (i32 1))), VK1)>;
+            (COPY_TO_REGCLASS (AND32ri8 (EXTRACT_SUBREG $src, sub_32bit),
+                                        (i32 1)), VK1)>;
 
   def : Pat<(i1 (trunc (i32 GR32:$src))),
-            (COPY_TO_REGCLASS (KMOVWkr (AND32ri8 $src, (i32 1))), VK1)>;
+            (COPY_TO_REGCLASS (AND32ri8 $src, (i32 1)), VK1)>;
 
   def : Pat<(i1 (trunc (i32 (assertzext_i1 GR32:$src)))),
             (COPY_TO_REGCLASS GR32:$src, VK1)>;
 
   def : Pat<(i1 (trunc (i8 GR8:$src))),
        (COPY_TO_REGCLASS
-        (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
-                                          GR8:$src, sub_8bit), (i32 1))),
-       VK1)>;
+        (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                 GR8:$src, sub_8bit), (i32 1)), VK1)>;
 
   def : Pat<(i1 (trunc (i16 GR16:$src))),
        (COPY_TO_REGCLASS
-        (KMOVWkr (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
-                                          GR16:$src, sub_16bit), (i32 1))),
-       VK1)>;
+        (AND32ri8 (INSERT_SUBREG (i32 (IMPLICIT_DEF)),
+                                 GR16:$src, sub_16bit), (i32 1)), VK1)>;
 
   def : Pat<(i32 (zext VK1:$src)),
-            (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1))>;
+            (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1))>;
 
   def : Pat<(i32 (anyext VK1:$src)),
             (COPY_TO_REGCLASS VK1:$src, GR32)>;
 
   def : Pat<(i8 (zext VK1:$src)),
             (EXTRACT_SUBREG
-             (AND32ri8 (KMOVWrk
-                        (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)), sub_8bit)>;
+             (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_8bit)>;
 
   def : Pat<(i8 (anyext VK1:$src)),
             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_8bit)>;
 
   def : Pat<(i64 (zext VK1:$src)),
-            (AND64ri8 (SUBREG_TO_REG (i64 0),
-             (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), sub_32bit), (i64 1))>;
+            (SUBREG_TO_REG (i64 0),
+             (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_32bit)>;
 
   def : Pat<(i64 (anyext VK1:$src)),
             (INSERT_SUBREG (i64 (IMPLICIT_DEF)),
@@ -2324,8 +2321,7 @@ let Predicates = [HasAVX512] in {
 
   def : Pat<(i16 (zext VK1:$src)),
             (EXTRACT_SUBREG
-             (AND32ri8 (KMOVWrk (COPY_TO_REGCLASS VK1:$src, VK16)), (i32 1)),
-              sub_16bit)>;
+             (AND32ri8 (COPY_TO_REGCLASS VK1:$src, GR32), (i32 1)), sub_16bit)>;
 
   def : Pat<(i16 (anyext VK1:$src)),
             (EXTRACT_SUBREG (i32 (COPY_TO_REGCLASS VK1:$src, GR32)), sub_16bit)>;

Modified: llvm/trunk/test/CodeGen/X86/avx512-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-cmp.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-cmp.ll Wed Mar 29 01:55:28 2017
@@ -158,26 +158,47 @@ B:
 }
 
 define i32 @test10(i64 %b, i64 %c, i1 %d) {
-; ALL-LABEL: test10:
-; ALL:       ## BB#0:
-; ALL-NEXT:    andl $1, %edx
-; ALL-NEXT:    kmovw %edx, %k0
-; ALL-NEXT:    cmpq %rsi, %rdi
-; ALL-NEXT:    sete %al
-; ALL-NEXT:    andl $1, %eax
-; ALL-NEXT:    kmovw %eax, %k1
-; ALL-NEXT:    korw %k1, %k0, %k1
-; ALL-NEXT:    kxorw %k1, %k0, %k0
-; ALL-NEXT:    kmovw %k0, %eax
-; ALL-NEXT:    andl $1, %eax
-; ALL-NEXT:    testb %al, %al
-; ALL-NEXT:    je LBB8_1
-; ALL-NEXT:  ## BB#2: ## %if.end.i
-; ALL-NEXT:    movl $6, %eax
-; ALL-NEXT:    retq
-; ALL-NEXT:  LBB8_1: ## %if.then.i
-; ALL-NEXT:    movl $5, %eax
-; ALL-NEXT:    retq
+; KNL-LABEL: test10:
+; KNL:       ## BB#0:
+; KNL-NEXT:    andl $1, %edx
+; KNL-NEXT:    kmovw %edx, %k0
+; KNL-NEXT:    cmpq %rsi, %rdi
+; KNL-NEXT:    sete %al
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    kmovw %eax, %k1
+; KNL-NEXT:    korw %k1, %k0, %k1
+; KNL-NEXT:    kxorw %k1, %k0, %k0
+; KNL-NEXT:    kmovw %k0, %eax
+; KNL-NEXT:    andl $1, %eax
+; KNL-NEXT:    testb %al, %al
+; KNL-NEXT:    je LBB8_1
+; KNL-NEXT:  ## BB#2: ## %if.end.i
+; KNL-NEXT:    movl $6, %eax
+; KNL-NEXT:    retq
+; KNL-NEXT:  LBB8_1: ## %if.then.i
+; KNL-NEXT:    movl $5, %eax
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: test10:
+; SKX:       ## BB#0:
+; SKX-NEXT:    andl $1, %edx
+; SKX-NEXT:    kmovd %edx, %k0
+; SKX-NEXT:    cmpq %rsi, %rdi
+; SKX-NEXT:    sete %al
+; SKX-NEXT:    andl $1, %eax
+; SKX-NEXT:    kmovd %eax, %k1
+; SKX-NEXT:    korw %k1, %k0, %k1
+; SKX-NEXT:    kxorw %k1, %k0, %k0
+; SKX-NEXT:    kmovd %k0, %eax
+; SKX-NEXT:    andl $1, %eax
+; SKX-NEXT:    testb %al, %al
+; SKX-NEXT:    je LBB8_1
+; SKX-NEXT:  ## BB#2: ## %if.end.i
+; SKX-NEXT:    movl $6, %eax
+; SKX-NEXT:    retq
+; SKX-NEXT:  LBB8_1: ## %if.then.i
+; SKX-NEXT:    movl $5, %eax
+; SKX-NEXT:    retq
 
   %cmp8.i = icmp eq i64 %b, %c
   %or1 = or i1 %d, %cmp8.i

Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Wed Mar 29 01:55:28 2017
@@ -1448,7 +1448,7 @@ define i16 @trunc_i32_to_i1(i32 %a) {
 ; SKX-LABEL: trunc_i32_to_i1:
 ; SKX:       ## BB#0:
 ; SKX-NEXT:    andl $1, %edi
-; SKX-NEXT:    kmovw %edi, %k0
+; SKX-NEXT:    kmovd %edi, %k0
 ; SKX-NEXT:    movw $-4, %ax
 ; SKX-NEXT:    kmovd %eax, %k1
 ; SKX-NEXT:    kshiftrw $1, %k1, %k1

Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Wed Mar 29 01:55:28 2017
@@ -275,7 +275,7 @@ define <16 x i32> @test11(<16 x i32>%a,
 ; SKX-NEXT:    vpcmpltud %zmm1, %zmm0, %k0
 ; SKX-NEXT:    kshiftlw $11, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    testb %al, %al
 ; SKX-NEXT:    je LBB10_2
@@ -317,7 +317,7 @@ define i64 @test12(<16 x i64>%a, <16 x i
 ; SKX-NEXT:    kunpckbw %k0, %k1, %k0
 ; SKX-NEXT:    kshiftlw $15, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    testb %al, %al
 ; SKX-NEXT:    cmoveq %rsi, %rdi
@@ -351,7 +351,7 @@ define i16 @test13(i32 %a, i32 %b) {
 ; SKX-NEXT:    cmpl %esi, %edi
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    kmovw %eax, %k0
+; SKX-NEXT:    kmovd %eax, %k0
 ; SKX-NEXT:    movw $-4, %ax
 ; SKX-NEXT:    kmovd %eax, %k1
 ; SKX-NEXT:    kshiftrw $1, %k1, %k1
@@ -384,7 +384,7 @@ define i64 @test14(<8 x i64>%a, <8 x i64
 ; SKX-NEXT:    vpcmpgtq %zmm0, %zmm1, %k0
 ; SKX-NEXT:    kshiftlb $3, %k0, %k0
 ; SKX-NEXT:    kshiftrb $7, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    testb %al, %al
 ; SKX-NEXT:    cmoveq %rsi, %rdi
@@ -1284,7 +1284,7 @@ define i32 @test_insertelement_v32i1(i32
 ; SKX-NEXT:    cmpl %esi, %edi
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    kmovw %eax, %k0
+; SKX-NEXT:    kmovd %eax, %k0
 ; SKX-NEXT:    vpcmpltud %zmm2, %zmm0, %k1
 ; SKX-NEXT:    vpcmpltud %zmm3, %zmm1, %k2
 ; SKX-NEXT:    kunpckwd %k1, %k2, %k1
@@ -1350,7 +1350,7 @@ define i8 @test_iinsertelement_v4i1(i32
 ; SKX-NEXT:    cmpl %esi, %edi
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    kmovw %eax, %k0
+; SKX-NEXT:    kmovd %eax, %k0
 ; SKX-NEXT:    vpcmpltud %xmm1, %xmm0, %k1
 ; SKX-NEXT:    vpmovm2d %k1, %xmm0
 ; SKX-NEXT:    vpmovm2d %k0, %xmm1
@@ -1397,7 +1397,7 @@ define i8 @test_iinsertelement_v2i1(i32
 ; SKX-NEXT:    cmpl %esi, %edi
 ; SKX-NEXT:    setb %al
 ; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    kmovw %eax, %k0
+; SKX-NEXT:    kmovd %eax, %k0
 ; SKX-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1
 ; SKX-NEXT:    kshiftlw $1, %k1, %k1
 ; SKX-NEXT:    kshiftrw $1, %k1, %k1
@@ -1431,7 +1431,7 @@ define zeroext i8 @test_extractelement_v
 ; SKX-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k0
 ; SKX-NEXT:    kshiftlw $15, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    cmpb $1, %al
 ; SKX-NEXT:    movb $3, %al
@@ -1461,7 +1461,7 @@ define zeroext i8 @extractelement_v2i1_a
 ; SKX-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k0
 ; SKX-NEXT:    kshiftlw $15, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    cmpb $1, %al
 ; SKX-NEXT:    movb $3, %al
@@ -1491,7 +1491,7 @@ define zeroext i8 @test_extractelement_v
 ; SKX-NEXT:    vpcmpnleud %xmm1, %xmm0, %k0
 ; SKX-NEXT:    kshiftlw $12, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    retq
   %t1 = icmp ugt <4 x i32> %a, %b
@@ -1516,7 +1516,7 @@ define zeroext i8 @test_extractelement_v
 ; SKX-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0
 ; SKX-NEXT:    kshiftld $29, %k0, %k0
 ; SKX-NEXT:    kshiftrd $31, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
@@ -1543,7 +1543,7 @@ define zeroext i8 @test_extractelement_v
 ; SKX:       ## BB#0:
 ; SKX-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
 ; SKX-NEXT:    kshiftrq $63, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    cmpb $1, %al
 ; SKX-NEXT:    movb $3, %al
@@ -1574,7 +1574,7 @@ define zeroext i8 @extractelement_v64i1_
 ; SKX:       ## BB#0:
 ; SKX-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0
 ; SKX-NEXT:    kshiftrq $63, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    cmpb $1, %al
 ; SKX-NEXT:    movb $3, %al

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Wed Mar 29 01:55:28 2017
@@ -332,7 +332,7 @@ define i32 @zext_test1(<16 x i32> %a, <1
 ; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
 ; SKX-NEXT:    kshiftlw $10, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    vzeroupper
 ; SKX-NEXT:    retq
@@ -342,7 +342,7 @@ define i32 @zext_test1(<16 x i32> %a, <1
 ; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
 ; AVX512BW-NEXT:    kshiftlw $10, %k0, %k0
 ; AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512BW-NEXT:    kmovw %k0, %eax
+; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    andl $1, %eax
 ; AVX512BW-NEXT:    vzeroupper
 ; AVX512BW-NEXT:    retq
@@ -378,7 +378,7 @@ define i16 @zext_test2(<16 x i32> %a, <1
 ; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
 ; SKX-NEXT:    kshiftlw $10, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
 ; SKX-NEXT:    vzeroupper
@@ -389,7 +389,7 @@ define i16 @zext_test2(<16 x i32> %a, <1
 ; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
 ; AVX512BW-NEXT:    kshiftlw $10, %k0, %k0
 ; AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512BW-NEXT:    kmovw %k0, %eax
+; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    andl $1, %eax
 ; AVX512BW-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
 ; AVX512BW-NEXT:    vzeroupper
@@ -427,7 +427,7 @@ define i8 @zext_test3(<16 x i32> %a, <16
 ; SKX-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
 ; SKX-NEXT:    kshiftlw $10, %k0, %k0
 ; SKX-NEXT:    kshiftrw $15, %k0, %k0
-; SKX-NEXT:    kmovw %k0, %eax
+; SKX-NEXT:    kmovd %k0, %eax
 ; SKX-NEXT:    andl $1, %eax
 ; SKX-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
 ; SKX-NEXT:    vzeroupper
@@ -438,7 +438,7 @@ define i8 @zext_test3(<16 x i32> %a, <16
 ; AVX512BW-NEXT:    vpcmpnleud %zmm1, %zmm0, %k0
 ; AVX512BW-NEXT:    kshiftlw $10, %k0, %k0
 ; AVX512BW-NEXT:    kshiftrw $15, %k0, %k0
-; AVX512BW-NEXT:    kmovw %k0, %eax
+; AVX512BW-NEXT:    kmovd %k0, %eax
 ; AVX512BW-NEXT:    andl $1, %eax
 ; AVX512BW-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
 ; AVX512BW-NEXT:    vzeroupper
@@ -1086,7 +1086,7 @@ define <64 x i8> @test17(i64 %x, i32 %y,
 ; SKX-NEXT:    cmpl %edx, %esi
 ; SKX-NEXT:    setg %al
 ; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    kmovw %eax, %k1
+; SKX-NEXT:    kmovd %eax, %k1
 ; SKX-NEXT:    vpmovm2b %k1, %zmm0
 ; SKX-NEXT:    vpsllq $40, %xmm0, %xmm0
 ; SKX-NEXT:    vpmovm2b %k0, %zmm1
@@ -1104,7 +1104,7 @@ define <64 x i8> @test17(i64 %x, i32 %y,
 ; AVX512BW-NEXT:    cmpl %edx, %esi
 ; AVX512BW-NEXT:    setg %al
 ; AVX512BW-NEXT:    andl $1, %eax
-; AVX512BW-NEXT:    kmovw %eax, %k1
+; AVX512BW-NEXT:    kmovd %eax, %k1
 ; AVX512BW-NEXT:    vpmovm2b %k1, %zmm0
 ; AVX512BW-NEXT:    vpsllq $40, %xmm0, %xmm0
 ; AVX512BW-NEXT:    vpmovm2b %k0, %zmm1
@@ -1395,7 +1395,7 @@ define void @store_v1i1(<1 x i1> %c , <1
 ; SKX-LABEL: store_v1i1:
 ; SKX:       ## BB#0:
 ; SKX-NEXT:    andl $1, %edi
-; SKX-NEXT:    kmovw %edi, %k0
+; SKX-NEXT:    kmovd %edi, %k0
 ; SKX-NEXT:    kxnorw %k0, %k0, %k1
 ; SKX-NEXT:    kshiftrw $15, %k1, %k1
 ; SKX-NEXT:    kxorw %k1, %k0, %k0
@@ -1405,7 +1405,7 @@ define void @store_v1i1(<1 x i1> %c , <1
 ; AVX512BW-LABEL: store_v1i1:
 ; AVX512BW:       ## BB#0:
 ; AVX512BW-NEXT:    andl $1, %edi
-; AVX512BW-NEXT:    kmovw %edi, %k0
+; AVX512BW-NEXT:    kmovd %edi, %k0
 ; AVX512BW-NEXT:    kxnorw %k0, %k0, %k1
 ; AVX512BW-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512BW-NEXT:    kxorw %k1, %k0, %k0
@@ -1629,7 +1629,7 @@ define void @f1(i32 %c) {
 ; SKX-NEXT:    movzbl {{.*}}(%rip), %edi
 ; SKX-NEXT:    movl %edi, %eax
 ; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    kmovw %eax, %k0
+; SKX-NEXT:    kmovd %eax, %k0
 ; SKX-NEXT:    kxnorw %k0, %k0, %k1
 ; SKX-NEXT:    kshiftrw $15, %k1, %k1
 ; SKX-NEXT:    kxorw %k1, %k0, %k0
@@ -1642,7 +1642,7 @@ define void @f1(i32 %c) {
 ; AVX512BW-NEXT:    movzbl {{.*}}(%rip), %edi
 ; AVX512BW-NEXT:    movl %edi, %eax
 ; AVX512BW-NEXT:    andl $1, %eax
-; AVX512BW-NEXT:    kmovw %eax, %k0
+; AVX512BW-NEXT:    kmovd %eax, %k0
 ; AVX512BW-NEXT:    kxnorw %k0, %k0, %k1
 ; AVX512BW-NEXT:    kshiftrw $15, %k1, %k1
 ; AVX512BW-NEXT:    kxorw %k1, %k0, %k0

Modified: llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fma-fneg-combine.ll Wed Mar 29 01:55:28 2017
@@ -142,7 +142,7 @@ define <4 x float> @test11(<4 x float> %
 ; SKX:       # BB#0: # %entry
 ; SKX-NEXT:    vxorps {{.*}}(%rip){1to4}, %xmm2, %xmm0
 ; SKX-NEXT:    andl $1, %edi
-; SKX-NEXT:    kmovw %edi, %k1
+; SKX-NEXT:    kmovd %edi, %k1
 ; SKX-NEXT:    vfmadd231ss %xmm1, %xmm1, %xmm0 {%k1}
 ; SKX-NEXT:    retq
 ;
@@ -183,13 +183,21 @@ entry:
 }
 
 define <2 x double> @test13(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: test13:
-; CHECK:       # BB#0: # %entry
-; CHECK-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm0
-; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    kmovw %edi, %k1
-; CHECK-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1}
-; CHECK-NEXT:    retq
+; SKX-LABEL: test13:
+; SKX:       # BB#0: # %entry
+; SKX-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm0
+; SKX-NEXT:    andl $1, %edi
+; SKX-NEXT:    kmovd %edi, %k1
+; SKX-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1}
+; SKX-NEXT:    retq
+;
+; KNL-LABEL: test13:
+; KNL:       # BB#0: # %entry
+; KNL-NEXT:    vxorpd {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT:    andl $1, %edi
+; KNL-NEXT:    kmovw %edi, %k1
+; KNL-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm0 {%k1}
+; KNL-NEXT:    retq
 entry:
   %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
   %0 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %sub.i, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)

Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Wed Mar 29 01:55:28 2017
@@ -1691,12 +1691,12 @@ define <16 x i64> @test_gather_16i64(<16
 ; KNL_32-LABEL: test_gather_16i64:
 ; KNL_32:       # BB#0:
 ; KNL_32-NEXT:    pushl %ebp
-; KNL_32-NEXT:  .Lcfi0:
+; KNL_32-NEXT:  .Lcfi4:
 ; KNL_32-NEXT:    .cfi_def_cfa_offset 8
-; KNL_32-NEXT:  .Lcfi1:
+; KNL_32-NEXT:  .Lcfi5:
 ; KNL_32-NEXT:    .cfi_offset %ebp, -8
 ; KNL_32-NEXT:    movl %esp, %ebp
-; KNL_32-NEXT:  .Lcfi2:
+; KNL_32-NEXT:  .Lcfi6:
 ; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 ; KNL_32-NEXT:    andl $-64, %esp
 ; KNL_32-NEXT:    subl $64, %esp
@@ -1814,12 +1814,12 @@ define <16 x double> @test_gather_16f64(
 ; KNL_32-LABEL: test_gather_16f64:
 ; KNL_32:       # BB#0:
 ; KNL_32-NEXT:    pushl %ebp
-; KNL_32-NEXT:  .Lcfi3:
+; KNL_32-NEXT:  .Lcfi7:
 ; KNL_32-NEXT:    .cfi_def_cfa_offset 8
-; KNL_32-NEXT:  .Lcfi4:
+; KNL_32-NEXT:  .Lcfi8:
 ; KNL_32-NEXT:    .cfi_offset %ebp, -8
 ; KNL_32-NEXT:    movl %esp, %ebp
-; KNL_32-NEXT:  .Lcfi5:
+; KNL_32-NEXT:  .Lcfi9:
 ; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 ; KNL_32-NEXT:    andl $-64, %esp
 ; KNL_32-NEXT:    subl $64, %esp
@@ -1936,12 +1936,12 @@ define void @test_scatter_16i64(<16 x i6
 ; KNL_32-LABEL: test_scatter_16i64:
 ; KNL_32:       # BB#0:
 ; KNL_32-NEXT:    pushl %ebp
-; KNL_32-NEXT:  .Lcfi6:
+; KNL_32-NEXT:  .Lcfi10:
 ; KNL_32-NEXT:    .cfi_def_cfa_offset 8
-; KNL_32-NEXT:  .Lcfi7:
+; KNL_32-NEXT:  .Lcfi11:
 ; KNL_32-NEXT:    .cfi_offset %ebp, -8
 ; KNL_32-NEXT:    movl %esp, %ebp
-; KNL_32-NEXT:  .Lcfi8:
+; KNL_32-NEXT:  .Lcfi12:
 ; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 ; KNL_32-NEXT:    andl $-64, %esp
 ; KNL_32-NEXT:    subl $64, %esp
@@ -2058,12 +2058,12 @@ define void @test_scatter_16f64(<16 x do
 ; KNL_32-LABEL: test_scatter_16f64:
 ; KNL_32:       # BB#0:
 ; KNL_32-NEXT:    pushl %ebp
-; KNL_32-NEXT:  .Lcfi9:
+; KNL_32-NEXT:  .Lcfi13:
 ; KNL_32-NEXT:    .cfi_def_cfa_offset 8
-; KNL_32-NEXT:  .Lcfi10:
+; KNL_32-NEXT:  .Lcfi14:
 ; KNL_32-NEXT:    .cfi_offset %ebp, -8
 ; KNL_32-NEXT:    movl %esp, %ebp
-; KNL_32-NEXT:  .Lcfi11:
+; KNL_32-NEXT:  .Lcfi15:
 ; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 ; KNL_32-NEXT:    andl $-64, %esp
 ; KNL_32-NEXT:    subl $64, %esp
@@ -2139,12 +2139,12 @@ define <4 x i64> @test_pr28312(<4 x i64*
 ; KNL_32-LABEL: test_pr28312:
 ; KNL_32:       # BB#0:
 ; KNL_32-NEXT:    pushl %ebp
-; KNL_32-NEXT:  .Lcfi12:
+; KNL_32-NEXT:  .Lcfi16:
 ; KNL_32-NEXT:    .cfi_def_cfa_offset 8
-; KNL_32-NEXT:  .Lcfi13:
+; KNL_32-NEXT:  .Lcfi17:
 ; KNL_32-NEXT:    .cfi_offset %ebp, -8
 ; KNL_32-NEXT:    movl %esp, %ebp
-; KNL_32-NEXT:  .Lcfi14:
+; KNL_32-NEXT:  .Lcfi18:
 ; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 ; KNL_32-NEXT:    andl $-32, %esp
 ; KNL_32-NEXT:    subl $32, %esp

Modified: llvm/trunk/test/CodeGen/X86/pr27591.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr27591.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr27591.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr27591.ll Wed Mar 29 01:55:28 2017
@@ -12,7 +12,7 @@ define void @test1(i32 %x) #0 {
 ; CHECK-NEXT:    # implicit-def: %EDI
 ; CHECK-NEXT:    movb %al, %dil
 ; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    kmovw %edi, %k0
+; CHECK-NEXT:    kmovd %edi, %k0
 ; CHECK-NEXT:    kmovd %k0, %edi
 ; CHECK-NEXT:    movb %dil, %al
 ; CHECK-NEXT:    andb $1, %al
@@ -35,8 +35,8 @@ define void @test2(i32 %x) #0 {
 ; CHECK-NEXT:    # implicit-def: %EDI
 ; CHECK-NEXT:    movb %al, %dil
 ; CHECK-NEXT:    andl $1, %edi
-; CHECK-NEXT:    kmovw %edi, %k0
-; CHECK-NEXT:    kmovw %k0, %edi
+; CHECK-NEXT:    kmovd %edi, %k0
+; CHECK-NEXT:    kmovd %k0, %edi
 ; CHECK-NEXT:    andl $1, %edi
 ; CHECK-NEXT:    movb %dil, %al
 ; CHECK-NEXT:    xorl %edi, %edi

Modified: llvm/trunk/test/CodeGen/X86/pr28173.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr28173.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr28173.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr28173.ll Wed Mar 29 01:55:28 2017
@@ -5,9 +5,6 @@
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; Note that the kmovs should really *not* appear in the output, this is an
-; artifact of the current poor lowering. This is tracked by PR28175.
-
 define i64 @foo64(i1 zeroext %i) #0 {
 ; CHECK-LABEL: foo64:
 ; CHECK:       # BB#0:
@@ -43,25 +40,13 @@ end:
   ret i16 %v
 }
 
-; This code is still not optimal
 define i16 @foo16_1(i1 zeroext %i, i32 %j) #0 {
-; KNL-LABEL: foo16_1:
-; KNL:       # BB#0:
-; KNL-NEXT:    kmovw %edi, %k0
-; KNL-NEXT:    kmovw %k0, %eax
-; KNL-NEXT:    andl $1, %eax
-; KNL-NEXT:    orl $2, %eax
-; KNL-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; KNL-NEXT:    retq
-;
-; SKX-LABEL: foo16_1:
-; SKX:       # BB#0:
-; SKX-NEXT:    kmovd %edi, %k0
-; SKX-NEXT:    kmovw %k0, %eax
-; SKX-NEXT:    andl $1, %eax
-; SKX-NEXT:    orl $2, %eax
-; SKX-NEXT:    # kill: %AX<def> %AX<kill> %EAX<kill>
-; SKX-NEXT:    retq
+; CHECK-LABEL: foo16_1:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    orl $2, %edi
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    retq
   br label %bb
 
 bb:

Modified: llvm/trunk/test/CodeGen/X86/pr32241.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr32241.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr32241.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr32241.ll Wed Mar 29 01:55:28 2017
@@ -41,7 +41,7 @@ define i32 @_Z3foov() {
 ; CHECK-NEXT:    jmp .LBB0_4
 ; CHECK-NEXT:  .LBB0_4: # %lor.end5
 ; CHECK-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # 2-byte Reload
-; CHECK-NEXT:    kmovw %k0, %eax
+; CHECK-NEXT:    kmovd %k0, %eax
 ; CHECK-NEXT:    andl $1, %eax
 ; CHECK-NEXT:    movw %ax, %cx
 ; CHECK-NEXT:    movw %cx, {{[0-9]+}}(%esp)

Modified: llvm/trunk/test/CodeGen/X86/pr32256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr32256.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr32256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr32256.ll Wed Mar 29 01:55:28 2017
@@ -16,7 +16,7 @@ define void @_Z1av() {
 ; CHECK-NEXT:    # implicit-def: %EAX
 ; CHECK-NEXT:    movb %cl, %al
 ; CHECK-NEXT:    andl $1, %eax
-; CHECK-NEXT:    kmovw %eax, %k1
+; CHECK-NEXT:    kmovd %eax, %k1
 ; CHECK-NEXT:    kmovq %k1, %k2
 ; CHECK-NEXT:    kxnorw %k0, %k0, %k3
 ; CHECK-NEXT:    kshiftrw $15, %k3, %k3

Modified: llvm/trunk/test/CodeGen/X86/xmulo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/xmulo.ll?rev=298984&r1=298983&r2=298984&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/xmulo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/xmulo.ll Wed Mar 29 01:55:28 2017
@@ -713,10 +713,10 @@ define i1 @bug27873(i64 %c1, i1 %c2) {
 ; KNL-LABEL: bug27873:
 ; KNL:       ## BB#0:
 ; KNL-NEXT:    andl $1, %esi
-; KNL-NEXT:    kmovw %esi, %k0
 ; KNL-NEXT:    movl $160, %ecx
 ; KNL-NEXT:    movq %rdi, %rax
 ; KNL-NEXT:    mulq %rcx
+; KNL-NEXT:    kmovw %esi, %k0
 ; KNL-NEXT:    seto %al
 ; KNL-NEXT:    andl $1, %eax
 ; KNL-NEXT:    kmovw %eax, %k1




More information about the llvm-commits mailing list