[llvm] r272626 - [AVX512] Use MOVZX32 instead of MOVZ16 for loading single v8/v4/v2/v1 masks when KMOVB is not available. This has better behavior with respect to partial register stalls since it won't need to preserve the upper 16-bits of the GPR.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Jun 13 20:13:00 PDT 2016


Author: ctopper
Date: Mon Jun 13 22:13:00 2016
New Revision: 272626

URL: http://llvm.org/viewvc/llvm-project?rev=272626&view=rev
Log:
[AVX512] Use MOVZX32 instead of MOVZ16 for loading single v8/v4/v2/v1 masks when KMOVB is not available. This has better behavior with respect to partial register stalls since it won't need to preserve the upper 16-bits of the GPR.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
    llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
    llvm/trunk/test/CodeGen/X86/avx512-select.ll
    llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=272626&r1=272625&r2=272626&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Mon Jun 13 22:13:00 2016
@@ -2050,18 +2050,18 @@ let Predicates = [HasAVX512, NoDQI] in {
               sub_8bit))>;
 
   def : Pat<(v8i1 (load addr:$src)),
-            (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK8)>;
+            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK8)>;
   def : Pat<(v2i1 (load addr:$src)),
-            (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK2)>;
+            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK2)>;
   def : Pat<(v4i1 (load addr:$src)),
-            (COPY_TO_REGCLASS (MOVZX16rm8 addr:$src), VK4)>;
+            (COPY_TO_REGCLASS (MOVZX32rm8 addr:$src), VK4)>;
 }
 
 let Predicates = [HasAVX512] in {
   def : Pat<(store (i16 (bitconvert (v16i1 VK16:$src))), addr:$dst),
             (KMOVWmk addr:$dst, VK16:$src)>;
   def : Pat<(i1 (load addr:$src)),
-            (COPY_TO_REGCLASS (AND16ri (MOVZX16rm8 addr:$src), (i16 1)), VK1)>;
+            (COPY_TO_REGCLASS (AND32ri (MOVZX32rm8 addr:$src), (i32 1)), VK1)>;
   def : Pat<(v16i1 (bitconvert (i16 (load addr:$src)))),
             (KMOVWkm addr:$src)>;
 }

Modified: llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll?rev=272626&r1=272625&r2=272626&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-insert-extract.ll Mon Jun 13 22:13:00 2016
@@ -200,7 +200,7 @@ define i16 @test15(i1 *%addr) {
 }
 
 ;CHECK-LABEL: test16
-;CHECK: movzbw (%rdi), %ax
+;CHECK: movzbl (%rdi), %eax
 ;CHECK: kmovw
 ;CHECK: kshiftlw        $10
 ;CHECK: korw
@@ -214,8 +214,8 @@ define i16 @test16(i1 *%addr, i16 %a) {
 }
 
 ;CHECK-LABEL: test17
-;KNL: movzbw (%rdi), %ax
-;KNL: andw $1, %ax
+;KNL: movzbl (%rdi), %eax
+;KNL: andl $1, %eax
 ;KNL: kshiftlw $4
 ;KNL: korw
 ;SKX: kshiftlb $4

Modified: llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll?rev=272626&r1=272625&r2=272626&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-mask-op.ll Mon Jun 13 22:13:00 2016
@@ -88,7 +88,7 @@ define void @mask16_mem(i16* %ptr) {
 define void @mask8_mem(i8* %ptr) {
 ; KNL-LABEL: mask8_mem:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    movzbw (%rdi), %ax
+; KNL-NEXT:    movzbl (%rdi), %eax
 ; KNL-NEXT:    kmovw %eax, %k0
 ; KNL-NEXT:    knotw %k0, %k0
 ; KNL-NEXT:    kmovw %k0, %eax
@@ -1341,7 +1341,7 @@ End:
 define <8 x i64> @load_8i1(<8 x i1>* %a) {
 ; KNL-LABEL: load_8i1:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    movzbw (%rdi), %ax
+; KNL-NEXT:    movzbl (%rdi), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
@@ -1376,7 +1376,7 @@ define <16 x i32> @load_16i1(<16 x i1>*
 define <2 x i16> @load_2i1(<2 x i1>* %a) {
 ; KNL-LABEL: load_2i1:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    movzbw (%rdi), %ax
+; KNL-NEXT:    movzbl (%rdi), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
 ; KNL-NEXT:    retq
@@ -1394,7 +1394,7 @@ define <2 x i16> @load_2i1(<2 x i1>* %a)
 define <4 x i16> @load_4i1(<4 x i1>* %a) {
 ; KNL-LABEL: load_4i1:
 ; KNL:       ## BB#0:
-; KNL-NEXT:    movzbw (%rdi), %ax
+; KNL-NEXT:    movzbl (%rdi), %eax
 ; KNL-NEXT:    kmovw %eax, %k1
 ; KNL-NEXT:    vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
 ; KNL-NEXT:    vpmovqd %zmm0, %ymm0

Modified: llvm/trunk/test/CodeGen/X86/avx512-select.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-select.ll?rev=272626&r1=272625&r2=272626&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-select.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-select.ll Mon Jun 13 22:13:00 2016
@@ -84,9 +84,9 @@ define i8 @select05(i8 %a.0, i8 %m) {
 define i8 @select05_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
 ; CHECK-LABEL: select05_mem:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    movzbw (%rsi), %ax
+; CHECK-NEXT:    movzbl (%rsi), %eax
 ; CHECK-NEXT:    kmovw %eax, %k0
-; CHECK-NEXT:    movzbw (%rdi), %ax
+; CHECK-NEXT:    movzbl (%rdi), %eax
 ; CHECK-NEXT:    kmovw %eax, %k1
 ; CHECK-NEXT:    korw %k1, %k0, %k0
 ; CHECK-NEXT:    kmovw %k0, %eax
@@ -114,9 +114,9 @@ define i8 @select06(i8 %a.0, i8 %m) {
 define i8 @select06_mem(<8 x i1>* %a.0, <8 x i1>* %m) {
 ; CHECK-LABEL: select06_mem:
 ; CHECK:       ## BB#0:
-; CHECK-NEXT:    movzbw (%rsi), %ax
+; CHECK-NEXT:    movzbl (%rsi), %eax
 ; CHECK-NEXT:    kmovw %eax, %k0
-; CHECK-NEXT:    movzbw (%rdi), %ax
+; CHECK-NEXT:    movzbl (%rdi), %eax
 ; CHECK-NEXT:    kmovw %eax, %k1
 ; CHECK-NEXT:    kandw %k1, %k0, %k0
 ; CHECK-NEXT:    kmovw %k0, %eax

Modified: llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll?rev=272626&r1=272625&r2=272626&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll (original)
+++ llvm/trunk/test/CodeGen/X86/masked_gather_scatter.ll Mon Jun 13 22:13:00 2016
@@ -291,7 +291,7 @@ define <8 x i32> @test7(i32* %base, <8 x
 ; KNL_32-LABEL: test7:
 ; KNL_32:       # BB#0:
 ; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT:    movzbw {{[0-9]+}}(%esp), %cx
+; KNL_32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; KNL_32-NEXT:    kmovw %ecx, %k1
 ; KNL_32-NEXT:    vpmovsxdq %ymm0, %zmm0
 ; KNL_32-NEXT:    kmovw %k1, %k2




More information about the llvm-commits mailing list