[llvm] r357155 - [X85][AVX] Add missing vXi16 broadcast fold patterns

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 28 03:25:13 PDT 2019


Author: rksimon
Date: Thu Mar 28 03:25:13 2019
New Revision: 357155

URL: http://llvm.org/viewvc/llvm-project?rev=357155&view=rev
Log:
[X85][AVX] Add missing vXi16 broadcast fold patterns

Now that D59484 has landed its easier to add these.

Added missing AVX512BW v32i16 equivalents while I was at it.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=357155&r1=357154&r2=357155&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Mar 28 03:25:13 2019
@@ -1394,12 +1394,30 @@ let Predicates = [HasVLX, HasBWI] in {
   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
             (VPBROADCASTWZ256m addr:$src)>;
   def : Pat<(v8i16 (X86VBroadcast
+              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+            (VPBROADCASTWZ128m addr:$src)>;
+  def : Pat<(v8i16 (X86VBroadcast
               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
             (VPBROADCASTWZ128m addr:$src)>;
   def : Pat<(v16i16 (X86VBroadcast
+              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+            (VPBROADCASTWZ256m addr:$src)>;
+  def : Pat<(v16i16 (X86VBroadcast
               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
             (VPBROADCASTWZ256m addr:$src)>;
 }
+let Predicates = [HasBWI] in {
+  // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably.
+  // This means we'll encounter truncated i32 loads; match that here.
+  def : Pat<(v32i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
+            (VPBROADCASTWZm addr:$src)>;
+  def : Pat<(v32i16 (X86VBroadcast
+              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+            (VPBROADCASTWZm addr:$src)>;
+  def : Pat<(v32i16 (X86VBroadcast
+              (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
+            (VPBROADCASTWZm addr:$src)>;
+}
 
 //===----------------------------------------------------------------------===//
 // AVX-512 BROADCAST SUBVECTORS

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=357155&r1=357154&r2=357155&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Thu Mar 28 03:25:13 2019
@@ -7955,9 +7955,15 @@ let Predicates = [HasAVX2, NoVLX_Or_NoBW
   def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
             (VPBROADCASTWYrm addr:$src)>;
   def : Pat<(v8i16 (X86VBroadcast
+              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+            (VPBROADCASTWrm addr:$src)>;
+  def : Pat<(v8i16 (X86VBroadcast
               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
             (VPBROADCASTWrm addr:$src)>;
   def : Pat<(v16i16 (X86VBroadcast
+              (i16 (trunc (i32 (extloadi16 addr:$src)))))),
+            (VPBROADCASTWYrm addr:$src)>;
+  def : Pat<(v16i16 (X86VBroadcast
               (i16 (trunc (i32 (zextloadi16 addr:$src)))))),
             (VPBROADCASTWYrm addr:$src)>;
 }

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll?rev=357155&r1=357154&r2=357155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v8.ll Thu Mar 28 03:25:13 2019
@@ -2666,18 +2666,10 @@ define <8 x i16> @insert_dup_mem_v8i16_s
 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: insert_dup_mem_v8i16_sext_i16:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    movzwl (%rdi), %eax
-; AVX2-NEXT:    vmovd %eax, %xmm0
-; AVX2-NEXT:    vpbroadcastw %xmm0, %xmm0
-; AVX2-NEXT:    retq
-;
-; AVX512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
-; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    movzwl (%rdi), %eax
-; AVX512VL-NEXT:    vpbroadcastw %eax, %xmm0
-; AVX512VL-NEXT:    retq
+; AVX2OR512VL-LABEL: insert_dup_mem_v8i16_sext_i16:
+; AVX2OR512VL:       # %bb.0:
+; AVX2OR512VL-NEXT:    vpbroadcastw (%rdi), %xmm0
+; AVX2OR512VL-NEXT:    retq
   %tmp = load i16, i16* %ptr, align 2
   %tmp1 = sext i16 %tmp to i32
   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll?rev=357155&r1=357154&r2=357155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v16.ll Thu Mar 28 03:25:13 2019
@@ -4729,18 +4729,10 @@ define <16 x i16> @insert_dup_mem_v16i16
 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: insert_dup_mem_v16i16_sext_i16:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    movzwl (%rdi), %eax
-; AVX2-NEXT:    vmovd %eax, %xmm0
-; AVX2-NEXT:    vpbroadcastw %xmm0, %ymm0
-; AVX2-NEXT:    retq
-;
-; AVX512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
-; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    movzwl (%rdi), %eax
-; AVX512VL-NEXT:    vpbroadcastw %eax, %ymm0
-; AVX512VL-NEXT:    retq
+; AVX2OR512VL-LABEL: insert_dup_mem_v16i16_sext_i16:
+; AVX2OR512VL:       # %bb.0:
+; AVX2OR512VL-NEXT:    vpbroadcastw (%rdi), %ymm0
+; AVX2OR512VL-NEXT:    retq
   %tmp = load i16, i16* %ptr, align 2
   %tmp1 = sext i16 %tmp to i32
   %tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll?rev=357155&r1=357154&r2=357155&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v32.ll Thu Mar 28 03:25:13 2019
@@ -220,8 +220,7 @@ define <32 x i16> @insert_dup_mem_v32i16
 ;
 ; SKX-LABEL: insert_dup_mem_v32i16_i32:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    movl (%rdi), %eax
-; SKX-NEXT:    vpbroadcastw %eax, %zmm0
+; SKX-NEXT:    vpbroadcastw (%rdi), %zmm0
 ; SKX-NEXT:    retq
   %tmp = load i32, i32* %ptr, align 4
   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -233,16 +232,13 @@ define <32 x i16> @insert_dup_mem_v32i16
 define <32 x i16> @insert_dup_mem_v32i16_sext_i16(i16* %ptr) {
 ; KNL-LABEL: insert_dup_mem_v32i16_sext_i16:
 ; KNL:       ## %bb.0:
-; KNL-NEXT:    movzwl (%rdi), %eax
-; KNL-NEXT:    vmovd %eax, %xmm0
-; KNL-NEXT:    vpbroadcastw %xmm0, %ymm0
+; KNL-NEXT:    vpbroadcastw (%rdi), %ymm0
 ; KNL-NEXT:    vmovdqa %ymm0, %ymm1
 ; KNL-NEXT:    retq
 ;
 ; SKX-LABEL: insert_dup_mem_v32i16_sext_i16:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    movzwl (%rdi), %eax
-; SKX-NEXT:    vpbroadcastw %eax, %zmm0
+; SKX-NEXT:    vpbroadcastw (%rdi), %zmm0
 ; SKX-NEXT:    retq
   %tmp = load i16, i16* %ptr, align 2
   %tmp1 = sext i16 %tmp to i32
@@ -261,8 +257,7 @@ define <32 x i16> @insert_dup_elt1_mem_v
 ;
 ; SKX-LABEL: insert_dup_elt1_mem_v32i16_i32:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    movzwl 2(%rdi), %eax
-; SKX-NEXT:    vpbroadcastw %eax, %zmm0
+; SKX-NEXT:    vpbroadcastw 2(%rdi), %zmm0
 ; SKX-NEXT:    retq
   %tmp = load i32, i32* %ptr, align 4
   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
@@ -280,8 +275,7 @@ define <32 x i16> @insert_dup_elt3_mem_v
 ;
 ; SKX-LABEL: insert_dup_elt3_mem_v32i16_i32:
 ; SKX:       ## %bb.0:
-; SKX-NEXT:    movzwl 2(%rdi), %eax
-; SKX-NEXT:    vpbroadcastw %eax, %zmm0
+; SKX-NEXT:    vpbroadcastw 2(%rdi), %zmm0
 ; SKX-NEXT:    retq
   %tmp = load i32, i32* %ptr, align 4
   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1




More information about the llvm-commits mailing list