[llvm] r261724 - AVX512: Add vpmovzxbw/d/q , vpmovzxw/d/q , vpmovzxbdq lowering patterns that support 256bit inputs like AVX patterns ( that are disable in case HasVLX , see SS41I_pmovx_avx2_patterns).

Wed Feb 24 00:15:21 PST 2016

Author: ibreger
Date: Wed Feb 24 02:15:20 2016
New Revision: 261724

URL: http://llvm.org/viewvc/llvm-project?rev=261724&view=rev
Log:
AVX512: Add vpmovzxbw/d/q ,vpmovzxw/d/q ,vpmovzxbdq lowering patterns that support 256bit inputs like AVX patterns ( that are disable in case HasVLX , see SS41I_pmovx_avx2_patterns).

Differential Revision: http://reviews.llvm.org/D17504

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-ext.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=261724&r1=261723&r2=261724&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Wed Feb 24 02:15:20 2016
@@ -6351,6 +6351,14 @@ multiclass avx512_extend_common<bits<8>
   }
 }
 
+// support full register inputs (like SSE paterns)
+multiclass avx512_extend_lowering<SDNode OpNode, X86VectorVTInfo To,
+                                  X86VectorVTInfo From, SubRegIndex SubRegIdx> {
+  def : Pat<(To.VT (OpNode (From.VT From.RC:$src))),
+            (!cast<Instruction>(NAME#To.ZSuffix#"rr") 
+              (EXTRACT_SUBREG From.RC:$src, SubRegIdx))>;
+}
+
 multiclass avx512_extend_BW<bits<8> opc, string OpcodeStr, SDNode OpNode,
           string ExtTy,PatFrag LdFrag = !cast<PatFrag>(ExtTy#"extloadvi8")> {
   let Predicates = [HasVLX, HasBWI] in {
@@ -6360,6 +6368,7 @@ multiclass avx512_extend_BW<bits<8> opc,
 
     defm Z256:  avx512_extend_common<opc, OpcodeStr, v16i16x_info,
                     v16i8x_info, i128mem, LdFrag, OpNode>,
+                avx512_extend_lowering<OpNode, v16i16x_info, v32i8x_info, sub_xmm>,
                      EVEX_CD8<8, CD8VH>, T8PD, EVEX_V256;
   }
   let Predicates = [HasBWI] in {
@@ -6378,6 +6387,7 @@ multiclass avx512_extend_BD<bits<8> opc,
 
     defm Z256:  avx512_extend_common<opc, OpcodeStr, v8i32x_info,
                    v16i8x_info, i64mem, LdFrag, OpNode>,
+                avx512_extend_lowering<OpNode, v8i32x_info, v32i8x_info, sub_xmm>,
                          EVEX_CD8<8, CD8VQ>, T8PD, EVEX_V256;
   }
   let Predicates = [HasAVX512] in {
@@ -6396,6 +6406,7 @@ multiclass avx512_extend_BQ<bits<8> opc,
 
     defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
                    v16i8x_info, i32mem, LdFrag, OpNode>,
+                avx512_extend_lowering<OpNode, v4i64x_info, v32i8x_info, sub_xmm>,
                      EVEX_CD8<8, CD8VO>, T8PD, EVEX_V256;
   }
   let Predicates = [HasAVX512] in {
@@ -6414,6 +6425,7 @@ multiclass avx512_extend_WD<bits<8> opc,
 
     defm Z256:  avx512_extend_common<opc, OpcodeStr, v8i32x_info,
                    v8i16x_info, i128mem, LdFrag, OpNode>,
+                avx512_extend_lowering<OpNode, v8i32x_info, v16i16x_info, sub_xmm>,
                      EVEX_CD8<16, CD8VH>, T8PD, EVEX_V256;
   }
   let Predicates = [HasAVX512] in {
@@ -6432,6 +6444,7 @@ multiclass avx512_extend_WQ<bits<8> opc,
 
     defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
                    v8i16x_info, i64mem, LdFrag, OpNode>,
+                avx512_extend_lowering<OpNode, v4i64x_info, v16i16x_info, sub_xmm>,
                      EVEX_CD8<16, CD8VQ>, T8PD, EVEX_V256;
   }
   let Predicates = [HasAVX512] in {
@@ -6451,6 +6464,7 @@ multiclass avx512_extend_DQ<bits<8> opc,
 
     defm Z256:  avx512_extend_common<opc, OpcodeStr, v4i64x_info,
                    v4i32x_info, i128mem, LdFrag, OpNode>,
+                avx512_extend_lowering<OpNode, v4i64x_info, v8i32x_info, sub_xmm>,
                      EVEX_CD8<32, CD8VH>, T8PD, EVEX_V256;
   }
   let Predicates = [HasAVX512] in {

Modified: llvm/trunk/test/CodeGen/X86/avx512-ext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-ext.ll?rev=261724&r1=261723&r2=261724&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-ext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-ext.ll Wed Feb 24 02:15:20 2016
@@ -1810,3 +1810,72 @@ define <64 x i16> @test21(<64 x i16> %x
   ret <64 x i16> %ret
 }
 
+define <16 x i16> @shuffle_zext_16x8_to_16x16(<16 x i8> %a) nounwind readnone {
+; ALL-LABEL: shuffle_zext_16x8_to_16x16:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; ALL-NEXT:    retq
+  %1 = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <32 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16, i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 13, i32 16, i32 14, i32 16, i32 15, i32 16>
+  %2 = bitcast <32 x i8> %1 to <16 x i16>
+  ret <16 x i16> %2
+}
+
+define <16 x i16> @zext_32x8_to_16x16(<32 x i8> %a) {
+; ALL-LABEL: zext_32x8_to_16x16:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
+; ALL-NEXT:    retq
+  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 1, i32 32, i32 2, i32 32, i32 3, i32 32, i32 4, i32 32, i32 5, i32 32, i32 6, i32 32, i32 7, i32 32, i32 8, i32 32, i32 9, i32 32, i32 10, i32 32, i32 11, i32 32, i32 12, i32 32, i32 13, i32 32, i32 14, i32 32, i32 15, i32 32>
+  %2 = bitcast <32 x i8> %1 to <16 x i16>
+  ret <16 x i16> %2
+}
+
+define <8 x i32> @zext_32x8_to_8x32(<32 x i8> %a) {
+; ALL-LABEL: zext_32x8_to_8x32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
+; ALL-NEXT:    retq
+  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 4, i32 32, i32 32, i32 32, i32 5, i32 32, i32 32, i32 32, i32 6, i32 32, i32 32, i32 32, i32 7, i32 32, i32 32, i32 32>
+  %2 = bitcast <32 x i8> %1 to <8 x i32>
+  ret <8 x i32> %2
+}
+
+define <4 x i64> @zext_32x8_to_4x64(<32 x i8> %a) {
+; ALL-LABEL: zext_32x8_to_4x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
+; ALL-NEXT:    retq
+  %1 = shufflevector <32 x i8> %a, <32 x i8> zeroinitializer, <32 x i32> <i32 0, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 2, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
+  %2 = bitcast <32 x i8> %1 to <4 x i64>
+  ret <4 x i64> %2
+}
+
+define <8 x i32> @zext_16x16_to_8x32(<16 x i16> %a) {
+; ALL-LABEL: zext_16x16_to_8x32:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
+; ALL-NEXT:    retq
+  %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 16, i32 2, i32 16, i32 3, i32 16, i32 4, i32 16, i32 5, i32 16, i32 6, i32 16, i32 7, i32 16>
+  %2 = bitcast <16 x i16> %1 to <8 x i32>
+  ret <8 x i32> %2
+}
+
+define <4 x i64> @zext_16x16_to_4x64(<16 x i16> %a) {
+; ALL-LABEL: zext_16x16_to_4x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
+; ALL-NEXT:    retq
+  %1 = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16>
+  %2 = bitcast <16 x i16> %1 to <4 x i64>
+  ret <4 x i64> %2
+}
+
+define <4 x i64> @zext_8x32_to_4x64(<8 x i32> %a) {
+; ALL-LABEL: zext_8x32_to_4x64:
+; ALL:       ## BB#0:
+; ALL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; ALL-NEXT:    retq
+  %1 = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8>
+  %2 = bitcast <8 x i32> %1 to <4 x i64>
+  ret <4 x i64> %2
+}