[llvm] r311308 - [AVX-512] Don't change which instructions we use for unmasked subvector broadcasts when AVX512DQ is enabled.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Sun Aug 20 22:29:02 PDT 2017


Author: ctopper
Date: Sun Aug 20 22:29:02 2017
New Revision: 311308

URL: http://llvm.org/viewvc/llvm-project?rev=311308&view=rev
Log:
[AVX-512] Don't change which instructions we use for unmasked subvector broadcasts when AVX512DQ is enabled.

There's no functional difference between the AVX512DQ instructions if we're not masking.

This change unifies test checks and removes extra isel entries. Similar was done for subvector insert and extracts recently.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti128.ll
    llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti256.ll
    llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=311308&r1=311307&r2=311308&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Sun Aug 20 22:29:02 2017
@@ -1115,6 +1115,19 @@ multiclass avx512_subvec_broadcast_rm<bi
                             AVX5128IBase, EVEX;
 }
 
+// This should be used for the AVX512DQ broadcast instructions. It disables
+// the unmasked patterns so that we only use the DQ instructions when masking
+//  is requested.
+multiclass avx512_subvec_broadcast_rm_dq<bits<8> opc, string OpcodeStr,
+                          X86VectorVTInfo _Dst, X86VectorVTInfo _Src> {
+  defm rm : AVX512_maskable_split<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst),
+                           (ins _Src.MemOp:$src), OpcodeStr, "$src", "$src",
+                           (null_frag),
+                           (_Dst.VT (X86SubVBroadcast
+                             (_Src.VT (bitconvert (_Src.LdFrag addr:$src)))))>,
+                            AVX5128IBase, EVEX;
+}
+
 let Predicates = [HasAVX512] in {
   // 32-bit targets will fail to load a i64 directly but can use ZEXT_LOAD.
   def : Pat<(v8i64 (X86VBroadcast (v8i64 (X86vzload addr:$src)))),
@@ -1159,6 +1172,10 @@ defm VBROADCASTF64X4 : avx512_subvec_bro
                        EVEX_V512, EVEX_CD8<64, CD8VT4>;
 
 let Predicates = [HasAVX512] in {
+def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
+          (VBROADCASTF64X4rm addr:$src)>;
+def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
+          (VBROADCASTI64X4rm addr:$src)>;
 def : Pat<(v32i16 (X86SubVBroadcast (bc_v16i16 (loadv4i64 addr:$src)))),
           (VBROADCASTI64X4rm addr:$src)>;
 def : Pat<(v64i8 (X86SubVBroadcast (bc_v32i8 (loadv4i64 addr:$src)))),
@@ -1169,9 +1186,15 @@ def : Pat<(v64i8 (X86SubVBroadcast (bc_v
 def : Pat<(v8f64 (X86SubVBroadcast (v4f64 VR256X:$src))),
           (VINSERTF64x4Zrr (INSERT_SUBREG (v8f64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
                            (v4f64 VR256X:$src), 1)>;
+def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
+          (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v8f32 VR256X:$src), 1)>;
 def : Pat<(v8i64 (X86SubVBroadcast (v4i64 VR256X:$src))),
           (VINSERTI64x4Zrr (INSERT_SUBREG (v8i64 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
                            (v4i64 VR256X:$src), 1)>;
+def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
+          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
+                           (v8i32 VR256X:$src), 1)>;
 def : Pat<(v32i16 (X86SubVBroadcast (v16i16 VR256X:$src))),
           (VINSERTI64x4Zrr (INSERT_SUBREG (v32i16 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
                            (v16i16 VR256X:$src), 1)>;
@@ -1179,6 +1202,10 @@ def : Pat<(v64i8 (X86SubVBroadcast (v32i
           (VINSERTI64x4Zrr (INSERT_SUBREG (v64i8 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
                            (v32i8 VR256X:$src), 1)>;
 
+def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
+          (VBROADCASTF32X4rm addr:$src)>;
+def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+          (VBROADCASTI32X4rm addr:$src)>;
 def : Pat<(v32i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
           (VBROADCASTI32X4rm addr:$src)>;
 def : Pat<(v64i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
@@ -1193,6 +1220,10 @@ defm VBROADCASTF32X4Z256 : avx512_subvec
                            v8f32x_info, v4f32x_info>,
                            EVEX_V256, EVEX_CD8<32, CD8VT4>;
 
+def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
+          (VBROADCASTF32X4Z256rm addr:$src)>;
+def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
+          (VBROADCASTI32X4Z256rm addr:$src)>;
 def : Pat<(v16i16 (X86SubVBroadcast (bc_v8i16 (loadv2i64 addr:$src)))),
           (VBROADCASTI32X4Z256rm addr:$src)>;
 def : Pat<(v32i8 (X86SubVBroadcast (bc_v16i8 (loadv2i64 addr:$src)))),
@@ -1200,9 +1231,15 @@ def : Pat<(v32i8 (X86SubVBroadcast (bc_v
 
 // Provide fallback in case the load node that is used in the patterns above
 // is used by additional users, which prevents the pattern selection.
+def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
+          (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+                              (v2f64 VR128X:$src), 1)>;
 def : Pat<(v8f32 (X86SubVBroadcast (v4f32 VR128X:$src))),
           (VINSERTF32x4Z256rr (INSERT_SUBREG (v8f32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
                               (v4f32 VR128X:$src), 1)>;
+def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
+          (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
+                              (v2i64 VR128X:$src), 1)>;
 def : Pat<(v8i32 (X86SubVBroadcast (v4i32 VR128X:$src))),
           (VINSERTI32x4Z256rr (INSERT_SUBREG (v8i32 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
                               (v4i32 VR128X:$src), 1)>;
@@ -1215,82 +1252,27 @@ def : Pat<(v32i8 (X86SubVBroadcast (v16i
 }
 
 let Predicates = [HasVLX, HasDQI] in {
-defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
+defm VBROADCASTI64X2Z128 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
                            v4i64x_info, v2i64x_info>, VEX_W,
                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
-defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
+defm VBROADCASTF64X2Z128 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
                            v4f64x_info, v2f64x_info>, VEX_W,
                            EVEX_V256, EVEX_CD8<64, CD8VT2>;
-
-// Provide fallback in case the load node that is used in the patterns above
-// is used by additional users, which prevents the pattern selection.
-def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
-          (VINSERTF64x2Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
-                              (v2f64 VR128X:$src), 1)>;
-def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
-          (VINSERTI64x2Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
-                              (v2i64 VR128X:$src), 1)>;
-}
-
-let Predicates = [HasVLX, NoDQI] in {
-def : Pat<(v4f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
-          (VBROADCASTF32X4Z256rm addr:$src)>;
-def : Pat<(v4i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
-          (VBROADCASTI32X4Z256rm addr:$src)>;
-
-// Provide fallback in case the load node that is used in the patterns above
-// is used by additional users, which prevents the pattern selection.
-def : Pat<(v4f64 (X86SubVBroadcast (v2f64 VR128X:$src))),
-          (VINSERTF32x4Z256rr (INSERT_SUBREG (v4f64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
-                              (v2f64 VR128X:$src), 1)>;
-def : Pat<(v4i64 (X86SubVBroadcast (v2i64 VR128X:$src))),
-          (VINSERTI32x4Z256rr (INSERT_SUBREG (v4i64 (IMPLICIT_DEF)), VR128X:$src, sub_xmm),
-                              (v2i64 VR128X:$src), 1)>;
-}
-
-let Predicates = [HasAVX512, NoDQI] in {
-def : Pat<(v8f64 (X86SubVBroadcast (loadv2f64 addr:$src))),
-          (VBROADCASTF32X4rm addr:$src)>;
-def : Pat<(v8i64 (X86SubVBroadcast (loadv2i64 addr:$src))),
-          (VBROADCASTI32X4rm addr:$src)>;
-
-def : Pat<(v16f32 (X86SubVBroadcast (loadv8f32 addr:$src))),
-          (VBROADCASTF64X4rm addr:$src)>;
-def : Pat<(v16i32 (X86SubVBroadcast (bc_v8i32 (loadv4i64 addr:$src)))),
-          (VBROADCASTI64X4rm addr:$src)>;
-
-// Provide fallback in case the load node that is used in the patterns above
-// is used by additional users, which prevents the pattern selection.
-def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
-          (VINSERTF64x4Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
-                           (v8f32 VR256X:$src), 1)>;
-def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
-          (VINSERTI64x4Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
-                           (v8i32 VR256X:$src), 1)>;
 }
 
 let Predicates = [HasDQI] in {
-defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm<0x5a, "vbroadcasti64x2",
+defm VBROADCASTI64X2 : avx512_subvec_broadcast_rm_dq<0x5a, "vbroadcasti64x2",
                        v8i64_info, v2i64x_info>, VEX_W,
                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
-defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm<0x5b, "vbroadcasti32x8",
+defm VBROADCASTI32X8 : avx512_subvec_broadcast_rm_dq<0x5b, "vbroadcasti32x8",
                        v16i32_info, v8i32x_info>,
                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
-defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm<0x1a, "vbroadcastf64x2",
+defm VBROADCASTF64X2 : avx512_subvec_broadcast_rm_dq<0x1a, "vbroadcastf64x2",
                        v8f64_info, v2f64x_info>, VEX_W,
                        EVEX_V512, EVEX_CD8<64, CD8VT2>;
-defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8",
+defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm_dq<0x1b, "vbroadcastf32x8",
                        v16f32_info, v8f32x_info>,
                        EVEX_V512, EVEX_CD8<32, CD8VT8>;
-
-// Provide fallback in case the load node that is used in the patterns above
-// is used by additional users, which prevents the pattern selection.
-def : Pat<(v16f32 (X86SubVBroadcast (v8f32 VR256X:$src))),
-          (VINSERTF32x8Zrr (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
-                           (v8f32 VR256X:$src), 1)>;
-def : Pat<(v16i32 (X86SubVBroadcast (v8i32 VR256X:$src))),
-          (VINSERTI32x8Zrr (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256X:$src, sub_ymm),
-                           (v8i32 VR256X:$src), 1)>;
 }
 
 multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr,

Modified: llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti128.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti128.ll?rev=311308&r1=311307&r2=311308&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti128.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti128.ll Sun Aug 20 22:29:02 2017
@@ -84,23 +84,11 @@ define <32 x i8> @test_broadcast_16i8_32
 ;
 
 define <8 x double> @test_broadcast_2f64_8f64(<2 x double> *%p) nounwind {
-; X64-AVX512VL-LABEL: test_broadcast_2f64_8f64:
-; X64-AVX512VL:       ## BB#0:
-; X64-AVX512VL-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512VL-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512VL-NEXT:    retq
-;
-; X64-AVX512BWVL-LABEL: test_broadcast_2f64_8f64:
-; X64-AVX512BWVL:       ## BB#0:
-; X64-AVX512BWVL-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512BWVL-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512BWVL-NEXT:    retq
-;
-; X64-AVX512DQVL-LABEL: test_broadcast_2f64_8f64:
-; X64-AVX512DQVL:       ## BB#0:
-; X64-AVX512DQVL-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
-; X64-AVX512DQVL-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512DQVL-NEXT:    retq
+; X64-AVX512-LABEL: test_broadcast_2f64_8f64:
+; X64-AVX512:       ## BB#0:
+; X64-AVX512-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X64-AVX512-NEXT:    vaddpd {{.*}}(%rip), %zmm0, %zmm0
+; X64-AVX512-NEXT:    retq
  %1 = load <2 x double>, <2 x double> *%p
  %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
  %3 = fadd <8 x double> %2, <double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0, double 8.0>
@@ -108,23 +96,11 @@ define <8 x double> @test_broadcast_2f64
 }
 
 define <8 x i64> @test_broadcast_2i64_8i64(<2 x i64> *%p) nounwind {
-; X64-AVX512VL-LABEL: test_broadcast_2i64_8i64:
-; X64-AVX512VL:       ## BB#0:
-; X64-AVX512VL-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512VL-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512VL-NEXT:    retq
-;
-; X64-AVX512BWVL-LABEL: test_broadcast_2i64_8i64:
-; X64-AVX512BWVL:       ## BB#0:
-; X64-AVX512BWVL-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512BWVL-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512BWVL-NEXT:    retq
-;
-; X64-AVX512DQVL-LABEL: test_broadcast_2i64_8i64:
-; X64-AVX512DQVL:       ## BB#0:
-; X64-AVX512DQVL-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
-; X64-AVX512DQVL-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512DQVL-NEXT:    retq
+; X64-AVX512-LABEL: test_broadcast_2i64_8i64:
+; X64-AVX512:       ## BB#0:
+; X64-AVX512-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X64-AVX512-NEXT:    vpaddq {{.*}}(%rip), %zmm0, %zmm0
+; X64-AVX512-NEXT:    retq
  %1 = load <2 x i64>, <2 x i64> *%p
  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
  %3 = add <8 x i64> %2, <i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8>

Modified: llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti256.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti256.ll?rev=311308&r1=311307&r2=311308&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti256.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vbroadcasti256.ll Sun Aug 20 22:29:02 2017
@@ -28,23 +28,11 @@ define <8 x i64> @test_broadcast_4i64_8i
 }
 
 define <16 x float> @test_broadcast_8f32_16f32(<8 x float> *%p) nounwind {
-; X64-AVX512VL-LABEL: test_broadcast_8f32_16f32:
-; X64-AVX512VL:       ## BB#0:
-; X64-AVX512VL-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512VL-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512VL-NEXT:    retq
-;
-; X64-AVX512BWVL-LABEL: test_broadcast_8f32_16f32:
-; X64-AVX512BWVL:       ## BB#0:
-; X64-AVX512BWVL-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512BWVL-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512BWVL-NEXT:    retq
-;
-; X64-AVX512DQVL-LABEL: test_broadcast_8f32_16f32:
-; X64-AVX512DQVL:       ## BB#0:
-; X64-AVX512DQVL-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
-; X64-AVX512DQVL-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512DQVL-NEXT:    retq
+; X64-AVX512-LABEL: test_broadcast_8f32_16f32:
+; X64-AVX512:       ## BB#0:
+; X64-AVX512-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
+; X64-AVX512-NEXT:    vaddps {{.*}}(%rip), %zmm0, %zmm0
+; X64-AVX512-NEXT:    retq
  %1 = load <8 x float>, <8 x float> *%p
  %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %3 = fadd <16 x float> %2, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>
@@ -52,23 +40,11 @@ define <16 x float> @test_broadcast_8f32
 }
 
 define <16 x i32> @test_broadcast_8i32_16i32(<8 x i32> *%p) nounwind {
-; X64-AVX512VL-LABEL: test_broadcast_8i32_16i32:
-; X64-AVX512VL:       ## BB#0:
-; X64-AVX512VL-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512VL-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512VL-NEXT:    retq
-;
-; X64-AVX512BWVL-LABEL: test_broadcast_8i32_16i32:
-; X64-AVX512BWVL:       ## BB#0:
-; X64-AVX512BWVL-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512BWVL-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512BWVL-NEXT:    retq
-;
-; X64-AVX512DQVL-LABEL: test_broadcast_8i32_16i32:
-; X64-AVX512DQVL:       ## BB#0:
-; X64-AVX512DQVL-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
-; X64-AVX512DQVL-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
-; X64-AVX512DQVL-NEXT:    retq
+; X64-AVX512-LABEL: test_broadcast_8i32_16i32:
+; X64-AVX512:       ## BB#0:
+; X64-AVX512-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
+; X64-AVX512-NEXT:    vpaddd {{.*}}(%rip), %zmm0, %zmm0
+; X64-AVX512-NEXT:    retq
  %1 = load <8 x i32>, <8 x i32> *%p
  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  %3 = add <16 x i32> %2, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>

Modified: llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll?rev=311308&r1=311307&r2=311308&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/subvector-broadcast.ll Sun Aug 20 22:29:02 2017
@@ -38,23 +38,11 @@ define <8 x double> @test_broadcast_2f64
 ; X32-AVX-NEXT:    vmovdqa %ymm0, %ymm1
 ; X32-AVX-NEXT:    retl
 ;
-; X32-AVX512F-LABEL: test_broadcast_2f64_8f64:
-; X32-AVX512F:       # BB#0:
-; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X32-AVX512F-NEXT:    retl
-;
-; X32-AVX512BW-LABEL: test_broadcast_2f64_8f64:
-; X32-AVX512BW:       # BB#0:
-; X32-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512BW-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X32-AVX512BW-NEXT:    retl
-;
-; X32-AVX512DQ-LABEL: test_broadcast_2f64_8f64:
-; X32-AVX512DQ:       # BB#0:
-; X32-AVX512DQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512DQ-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
-; X32-AVX512DQ-NEXT:    retl
+; X32-AVX512-LABEL: test_broadcast_2f64_8f64:
+; X32-AVX512:       # BB#0:
+; X32-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X32-AVX512-NEXT:    retl
 ;
 ; X64-AVX-LABEL: test_broadcast_2f64_8f64:
 ; X64-AVX:       # BB#0:
@@ -62,20 +50,10 @@ define <8 x double> @test_broadcast_2f64
 ; X64-AVX-NEXT:    vmovdqa %ymm0, %ymm1
 ; X64-AVX-NEXT:    retq
 ;
-; X64-AVX512F-LABEL: test_broadcast_2f64_8f64:
-; X64-AVX512F:       # BB#0:
-; X64-AVX512F-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512F-NEXT:    retq
-;
-; X64-AVX512BW-LABEL: test_broadcast_2f64_8f64:
-; X64-AVX512BW:       # BB#0:
-; X64-AVX512BW-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512BW-NEXT:    retq
-;
-; X64-AVX512DQ-LABEL: test_broadcast_2f64_8f64:
-; X64-AVX512DQ:       # BB#0:
-; X64-AVX512DQ-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
-; X64-AVX512DQ-NEXT:    retq
+; X64-AVX512-LABEL: test_broadcast_2f64_8f64:
+; X64-AVX512:       # BB#0:
+; X64-AVX512-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X64-AVX512-NEXT:    retq
  %1 = load <2 x double>, <2 x double> *%p
  %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
  ret <8 x double> %2
@@ -152,23 +130,11 @@ define <8 x i64> @test_broadcast_2i64_8i
 ; X32-AVX2-NEXT:    vmovaps %ymm0, %ymm1
 ; X32-AVX2-NEXT:    retl
 ;
-; X32-AVX512F-LABEL: test_broadcast_2i64_8i64:
-; X32-AVX512F:       # BB#0:
-; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X32-AVX512F-NEXT:    retl
-;
-; X32-AVX512BW-LABEL: test_broadcast_2i64_8i64:
-; X32-AVX512BW:       # BB#0:
-; X32-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X32-AVX512BW-NEXT:    retl
-;
-; X32-AVX512DQ-LABEL: test_broadcast_2i64_8i64:
-; X32-AVX512DQ:       # BB#0:
-; X32-AVX512DQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512DQ-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
-; X32-AVX512DQ-NEXT:    retl
+; X32-AVX512-LABEL: test_broadcast_2i64_8i64:
+; X32-AVX512:       # BB#0:
+; X32-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X32-AVX512-NEXT:    retl
 ;
 ; X64-AVX1-LABEL: test_broadcast_2i64_8i64:
 ; X64-AVX1:       # BB#0:
@@ -182,20 +148,10 @@ define <8 x i64> @test_broadcast_2i64_8i
 ; X64-AVX2-NEXT:    vmovaps %ymm0, %ymm1
 ; X64-AVX2-NEXT:    retq
 ;
-; X64-AVX512F-LABEL: test_broadcast_2i64_8i64:
-; X64-AVX512F:       # BB#0:
-; X64-AVX512F-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512F-NEXT:    retq
-;
-; X64-AVX512BW-LABEL: test_broadcast_2i64_8i64:
-; X64-AVX512BW:       # BB#0:
-; X64-AVX512BW-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
-; X64-AVX512BW-NEXT:    retq
-;
-; X64-AVX512DQ-LABEL: test_broadcast_2i64_8i64:
-; X64-AVX512DQ:       # BB#0:
-; X64-AVX512DQ-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 = mem[0,1,0,1,0,1,0,1]
-; X64-AVX512DQ-NEXT:    retq
+; X64-AVX512-LABEL: test_broadcast_2i64_8i64:
+; X64-AVX512:       # BB#0:
+; X64-AVX512-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
+; X64-AVX512-NEXT:    retq
  %1 = load <2 x i64>, <2 x i64> *%p
  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
  ret <8 x i64> %2
@@ -283,23 +239,11 @@ define <16 x float> @test_broadcast_8f32
 ; X32-AVX-NEXT:    vmovaps %ymm0, %ymm1
 ; X32-AVX-NEXT:    retl
 ;
-; X32-AVX512F-LABEL: test_broadcast_8f32_16f32:
-; X32-AVX512F:       # BB#0:
-; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X32-AVX512F-NEXT:    retl
-;
-; X32-AVX512BW-LABEL: test_broadcast_8f32_16f32:
-; X32-AVX512BW:       # BB#0:
-; X32-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512BW-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X32-AVX512BW-NEXT:    retl
-;
-; X32-AVX512DQ-LABEL: test_broadcast_8f32_16f32:
-; X32-AVX512DQ:       # BB#0:
-; X32-AVX512DQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512DQ-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
-; X32-AVX512DQ-NEXT:    retl
+; X32-AVX512-LABEL: test_broadcast_8f32_16f32:
+; X32-AVX512:       # BB#0:
+; X32-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
+; X32-AVX512-NEXT:    retl
 ;
 ; X64-AVX-LABEL: test_broadcast_8f32_16f32:
 ; X64-AVX:       # BB#0:
@@ -307,20 +251,10 @@ define <16 x float> @test_broadcast_8f32
 ; X64-AVX-NEXT:    vmovaps %ymm0, %ymm1
 ; X64-AVX-NEXT:    retq
 ;
-; X64-AVX512F-LABEL: test_broadcast_8f32_16f32:
-; X64-AVX512F:       # BB#0:
-; X64-AVX512F-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512F-NEXT:    retq
-;
-; X64-AVX512BW-LABEL: test_broadcast_8f32_16f32:
-; X64-AVX512BW:       # BB#0:
-; X64-AVX512BW-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512BW-NEXT:    retq
-;
-; X64-AVX512DQ-LABEL: test_broadcast_8f32_16f32:
-; X64-AVX512DQ:       # BB#0:
-; X64-AVX512DQ-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
-; X64-AVX512DQ-NEXT:    retq
+; X64-AVX512-LABEL: test_broadcast_8f32_16f32:
+; X64-AVX512:       # BB#0:
+; X64-AVX512-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
+; X64-AVX512-NEXT:    retq
  %1 = load <8 x float>, <8 x float> *%p
  %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <16 x float> %2
@@ -403,23 +337,11 @@ define <16 x i32> @test_broadcast_8i32_1
 ; X32-AVX-NEXT:    vmovaps %ymm0, %ymm1
 ; X32-AVX-NEXT:    retl
 ;
-; X32-AVX512F-LABEL: test_broadcast_8i32_16i32:
-; X32-AVX512F:       # BB#0:
-; X32-AVX512F-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512F-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X32-AVX512F-NEXT:    retl
-;
-; X32-AVX512BW-LABEL: test_broadcast_8i32_16i32:
-; X32-AVX512BW:       # BB#0:
-; X32-AVX512BW-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512BW-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X32-AVX512BW-NEXT:    retl
-;
-; X32-AVX512DQ-LABEL: test_broadcast_8i32_16i32:
-; X32-AVX512DQ:       # BB#0:
-; X32-AVX512DQ-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-AVX512DQ-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
-; X32-AVX512DQ-NEXT:    retl
+; X32-AVX512-LABEL: test_broadcast_8i32_16i32:
+; X32-AVX512:       # BB#0:
+; X32-AVX512-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
+; X32-AVX512-NEXT:    retl
 ;
 ; X64-AVX-LABEL: test_broadcast_8i32_16i32:
 ; X64-AVX:       # BB#0:
@@ -427,20 +349,10 @@ define <16 x i32> @test_broadcast_8i32_1
 ; X64-AVX-NEXT:    vmovaps %ymm0, %ymm1
 ; X64-AVX-NEXT:    retq
 ;
-; X64-AVX512F-LABEL: test_broadcast_8i32_16i32:
-; X64-AVX512F:       # BB#0:
-; X64-AVX512F-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512F-NEXT:    retq
-;
-; X64-AVX512BW-LABEL: test_broadcast_8i32_16i32:
-; X64-AVX512BW:       # BB#0:
-; X64-AVX512BW-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
-; X64-AVX512BW-NEXT:    retq
-;
-; X64-AVX512DQ-LABEL: test_broadcast_8i32_16i32:
-; X64-AVX512DQ:       # BB#0:
-; X64-AVX512DQ-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
-; X64-AVX512DQ-NEXT:    retq
+; X64-AVX512-LABEL: test_broadcast_8i32_16i32:
+; X64-AVX512:       # BB#0:
+; X64-AVX512-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
+; X64-AVX512-NEXT:    retq
  %1 = load <8 x i32>, <8 x i32> *%p
  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
  ret <16 x i32> %2




More information about the llvm-commits mailing list