[PATCH] D35897: [X86][AVX512] Add masked MOVS[S|D] patterns

Guy Blank via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Sun Jul 30 01:20:18 PDT 2017


guyblank updated this revision to Diff 108822.
guyblank added a comment.

fit into 80 columns


https://reviews.llvm.org/D35897

Files:
  lib/Target/X86/X86InstrAVX512.td
  test/CodeGen/X86/avx512-load-store.ll
  test/CodeGen/X86/avx512-select.ll


Index: test/CodeGen/X86/avx512-select.ll
===================================================================
--- test/CodeGen/X86/avx512-select.ll
+++ test/CodeGen/X86/avx512-select.ll
@@ -289,7 +289,6 @@
 ;
 ; X64-LABEL: pr30561_f64:
 ; X64:       # BB#0:
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1}
 ; X64-NEXT:    retq
@@ -309,7 +308,6 @@
 ;
 ; X64-LABEL: pr30561_f32:
 ; X64:       # BB#0:
-; X64-NEXT:    andb $1, %dil
 ; X64-NEXT:    kmovw %edi, %k1
 ; X64-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1}
 ; X64-NEXT:    retq
Index: test/CodeGen/X86/avx512-load-store.ll
===================================================================
--- test/CodeGen/X86/avx512-load-store.ll
+++ test/CodeGen/X86/avx512-load-store.ll
@@ -12,7 +12,6 @@
 ; CHECK32-LABEL: test_mm_mask_move_ss:
 ; CHECK32:       # BB#0: # %entry
 ; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; CHECK32-NEXT:    andb $1, %al
 ; CHECK32-NEXT:    kmovw %eax, %k1
 ; CHECK32-NEXT:    vmovss %xmm2, %xmm0, %xmm0 {%k1}
 ; CHECK32-NEXT:    vmovss {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
@@ -37,7 +36,6 @@
 ; CHECK32-LABEL: test_mm_maskz_move_ss:
 ; CHECK32:       # BB#0: # %entry
 ; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; CHECK32-NEXT:    andb $1, %al
 ; CHECK32-NEXT:    kmovw %eax, %k1
 ; CHECK32-NEXT:    vxorps %xmm2, %xmm2, %xmm2
 ; CHECK32-NEXT:    vmovss %xmm1, %xmm0, %xmm2 {%k1}
@@ -62,7 +60,6 @@
 ; CHECK32-LABEL: test_mm_mask_move_sd:
 ; CHECK32:       # BB#0: # %entry
 ; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; CHECK32-NEXT:    andb $1, %al
 ; CHECK32-NEXT:    kmovw %eax, %k1
 ; CHECK32-NEXT:    vmovsd %xmm2, %xmm0, %xmm0 {%k1}
 ; CHECK32-NEXT:    vmovsd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
@@ -87,7 +84,6 @@
 ; CHECK32-LABEL: test_mm_maskz_move_sd:
 ; CHECK32:       # BB#0: # %entry
 ; CHECK32-NEXT:    movb {{[0-9]+}}(%esp), %al
-; CHECK32-NEXT:    andb $1, %al
 ; CHECK32-NEXT:    kmovw %eax, %k1
 ; CHECK32-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
 ; CHECK32-NEXT:    vmovsd %xmm1, %xmm0, %xmm2 {%k1}
Index: lib/Target/X86/X86InstrAVX512.td
===================================================================
--- lib/Target/X86/X86InstrAVX512.td
+++ lib/Target/X86/X86InstrAVX512.td
@@ -4002,10 +4002,26 @@
 defm : avx512_load_scalar_lowering_subreg<"VMOVSDZ", avx512vl_f64_info,
                    (v8i1 (bitconvert (i8 (and GR8:$mask, (i8 1))))), GR8, sub_8bit>;
 
+def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
+                           (f32 FR32X:$src1), (f32 FR32X:$src2))),
+          (COPY_TO_REGCLASS
+            (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
+                        (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
+                          GR8:$mask, sub_8bit)), VK1WM),
+            (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
+
 def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))),
           (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X),
            VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>;
 
+def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))),
+                           (f64 FR64X:$src1), (f64 FR64X:$src2))),
+          (COPY_TO_REGCLASS
+            (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
+                        (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF),
+                          GR8:$mask, sub_8bit)), VK1WM),
+            (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;
+
 def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))),
           (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X),
            VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>;


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D35897.108822.patch
Type: text/x-patch
Size: 3759 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170730/dfedc1a2/attachment.bin>


More information about the llvm-commits mailing list