[llvm] r320268 - [X86] Use KMOV instructions to zero upper bits of vectors when possible.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sat Dec 9 15:10:59 PST 2017


Author: ctopper
Date: Sat Dec  9 15:10:59 2017
New Revision: 320268

URL: http://llvm.org/viewvc/llvm-project?rev=320268&view=rev
Log:
[X86] Use KMOV instructions to zero upper bits of vectors when possible.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td
    llvm/trunk/test/CodeGen/X86/avx512bw-mov.ll
    llvm/trunk/test/CodeGen/X86/pr34605.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td?rev=320268&r1=320267&r2=320268&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrVecCompiler.td Sat Dec  9 15:10:59 2017
@@ -495,13 +495,19 @@ let Predicates = [HasBWI, HasVLX] in {
 
 // If the bits are not zero we have to fall back to explicitly zeroing by
 // using shifts.
-let Predicates = [HasAVX512] in {
+let Predicates = [HasAVX512, NoDQI] in {
   def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
                                      (v8i1 VK8:$mask), (iPTR 0))),
             (KSHIFTRWri (KSHIFTLWri (COPY_TO_REGCLASS VK8:$mask, VK16),
                                     (i8 8)), (i8 8))>;
 }
 
+let Predicates = [HasDQI] in {
+  def : Pat<(v16i1 (insert_subvector (v16i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK16)>;
+}
+
 let Predicates = [HasVLX, HasDQI] in {
   def : Pat<(v8i1 (insert_subvector (v8i1 immAllZerosV),
                                     (v2i1 VK2:$mask), (iPTR 0))),
@@ -526,26 +532,37 @@ let Predicates = [HasVLX] in {
 
 let Predicates = [HasBWI] in {
   def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v16i1 VK16:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK32)>;
+
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v16i1 VK16:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVWkk VK16:$mask), VK64)>;
+  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
+                                     (v32i1 VK32:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVDkk VK32:$mask), VK64)>;
+}
+
+let Predicates = [HasBWI, NoDQI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
                                      (v8i1 VK8:$mask), (iPTR 0))),
             (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK8:$mask, VK32),
                                     (i8 24)), (i8 24))>;
-  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
-                                     (v16i1 VK16:$mask), (iPTR 0))),
-            (KSHIFTRDri (KSHIFTLDri (COPY_TO_REGCLASS VK16:$mask, VK32),
-                                    (i8 16)), (i8 16))>;
 
   def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
                                      (v8i1 VK8:$mask), (iPTR 0))),
             (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK8:$mask, VK64),
                                     (i8 56)), (i8 56))>;
+}
+
+let Predicates = [HasBWI, HasDQI] in {
+  def : Pat<(v32i1 (insert_subvector (v32i1 immAllZerosV),
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK32)>;
+
   def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
-                                     (v16i1 VK16:$mask), (iPTR 0))),
-            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK16:$mask, VK64),
-                                    (i8 48)), (i8 48))>;
-  def : Pat<(v64i1 (insert_subvector (v64i1 immAllZerosV),
-                                     (v32i1 VK32:$mask), (iPTR 0))),
-            (KSHIFTRQri (KSHIFTLQri (COPY_TO_REGCLASS VK32:$mask, VK64),
-                                    (i8 32)), (i8 32))>;
+                                     (v8i1 VK8:$mask), (iPTR 0))),
+            (COPY_TO_REGCLASS (KMOVBkk VK8:$mask), VK64)>;
 }
 
 let Predicates = [HasBWI, HasVLX] in {

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-mov.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-mov.ll?rev=320268&r1=320267&r2=320268&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-mov.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-mov.ll Sat Dec  9 15:10:59 2017
@@ -102,8 +102,7 @@ define <16 x i8> @test_mask_load_16xi8(<
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
-; CHECK-NEXT:    kshiftlq $48, %k0, %k0
-; CHECK-NEXT:    kshiftrq $48, %k0, %k1
+; CHECK-NEXT:    kmovw %k0, %k1
 ; CHECK-NEXT:    vmovdqu8 (%rdi), %zmm0 {%k1} {z}
 ; CHECK-NEXT:    ## kill: def %xmm0 killed %xmm0 killed %zmm0
 ; CHECK-NEXT:    retq
@@ -117,8 +116,7 @@ define <32 x i8> @test_mask_load_32xi8(<
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm0
 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
-; CHECK-NEXT:    kshiftlq $32, %k0, %k0
-; CHECK-NEXT:    kshiftrq $32, %k0, %k1
+; CHECK-NEXT:    kmovd %k0, %k1
 ; CHECK-NEXT:    vmovdqu8 (%rdi), %zmm0 {%k1} {z}
 ; CHECK-NEXT:    ## kill: def %ymm0 killed %ymm0 killed %zmm0
 ; CHECK-NEXT:    retq
@@ -147,8 +145,7 @@ define <16 x i16> @test_mask_load_16xi16
 ; CHECK:       ## %bb.0:
 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
-; CHECK-NEXT:    kshiftld $16, %k0, %k0
-; CHECK-NEXT:    kshiftrd $16, %k0, %k1
+; CHECK-NEXT:    kmovw %k0, %k1
 ; CHECK-NEXT:    vmovdqu16 (%rdi), %zmm0 {%k1} {z}
 ; CHECK-NEXT:    ## kill: def %ymm0 killed %ymm0 killed %zmm0
 ; CHECK-NEXT:    retq
@@ -163,8 +160,7 @@ define void @test_mask_store_16xi8(<16 x
 ; CHECK-NEXT:    ## kill: def %xmm1 killed %xmm1 def %zmm1
 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
-; CHECK-NEXT:    kshiftlq $48, %k0, %k0
-; CHECK-NEXT:    kshiftrq $48, %k0, %k1
+; CHECK-NEXT:    kmovw %k0, %k1
 ; CHECK-NEXT:    vmovdqu8 %zmm1, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
   call void @llvm.masked.store.v16i8(<16 x i8> %val, <16 x i8>* %addr, i32 4, <16 x i1>%mask)
@@ -178,8 +174,7 @@ define void @test_mask_store_32xi8(<32 x
 ; CHECK-NEXT:    ## kill: def %ymm1 killed %ymm1 def %zmm1
 ; CHECK-NEXT:    vpsllw $7, %ymm0, %ymm0
 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
-; CHECK-NEXT:    kshiftlq $32, %k0, %k0
-; CHECK-NEXT:    kshiftrq $32, %k0, %k1
+; CHECK-NEXT:    kmovd %k0, %k1
 ; CHECK-NEXT:    vmovdqu8 %zmm1, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
   call void @llvm.masked.store.v32i8(<32 x i8> %val, <32 x i8>* %addr, i32 4, <32 x i1>%mask)
@@ -208,8 +203,7 @@ define void @test_mask_store_16xi16(<16
 ; CHECK-NEXT:    ## kill: def %ymm1 killed %ymm1 def %zmm1
 ; CHECK-NEXT:    vpsllw $7, %xmm0, %xmm0
 ; CHECK-NEXT:    vpmovb2m %zmm0, %k0
-; CHECK-NEXT:    kshiftld $16, %k0, %k0
-; CHECK-NEXT:    kshiftrd $16, %k0, %k1
+; CHECK-NEXT:    kmovw %k0, %k1
 ; CHECK-NEXT:    vmovdqu16 %zmm1, (%rdi) {%k1}
 ; CHECK-NEXT:    retq
   call void @llvm.masked.store.v16i16(<16 x i16> %val, <16 x i16>* %addr, i32 4, <16 x i1>%mask)

Modified: llvm/trunk/test/CodeGen/X86/pr34605.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/pr34605.ll?rev=320268&r1=320267&r2=320268&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/pr34605.ll (original)
+++ llvm/trunk/test/CodeGen/X86/pr34605.ll Sat Dec  9 15:10:59 2017
@@ -15,8 +15,7 @@ define void @pr34605(i8* nocapture %s, i
 ; CHECK-NEXT:    kunpckdq %k0, %k1, %k0
 ; CHECK-NEXT:    movl $1, %ecx
 ; CHECK-NEXT:    kmovd %ecx, %k1
-; CHECK-NEXT:    kshiftlq $32, %k1, %k1
-; CHECK-NEXT:    kshiftrq $32, %k1, %k1
+; CHECK-NEXT:    kmovd %k1, %k1
 ; CHECK-NEXT:    kandq %k1, %k0, %k1
 ; CHECK-NEXT:    vmovdqu8 {{\.LCPI.*}}, %zmm0 {%k1} {z}
 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1




More information about the llvm-commits mailing list