[llvm] r292475 - [AVX-512] Add test cases that show where we are using two subvector inserts to broadcast a 128-bit subvector into a 512-bit vector. We'd be better off using something like SHUFF32X4.

Wed Jan 18 23:37:46 PST 2017

Author: ctopper
Date: Thu Jan 19 01:37:45 2017
New Revision: 292475

URL: http://llvm.org/viewvc/llvm-project?rev=292475&view=rev
Log:
[AVX-512] Add test cases that show where we are using two subvector inserts to broadcast a 128-bit subvector into a 512-bit vector. We'd be better off using something like SHUFF32X4.

If the subvector comes from a load, we convert to SUBV_BROADCAST and use a broadcast instruction. But if there is no load we keep the inserts. I think we should create the SUBV_BROADCAST even without the load and let isel use the fallback patterns that are used if the load can't be folded. This will use the SHUFF32X4 or similar instruction for the 128-bit into 512-bit case and a single insert for 128 into 256 or 256 into 512.

This should be fixed so subvector broadcast intrinsics can be replaced with native IR since some of those currently lower directly to SHUFF32X4.

Modified:
    llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll?rev=292475&r1=292474&r2=292475&view=diff
==============================================================================

--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v16.ll Thu Jan 19 01:37:45 2017
@@ -548,3 +548,25 @@ define <16 x i32> @mask_shuffle_v16i32_0
   %res = select <16 x i1> %mask.cast, <16 x i32> %shuffle, <16 x i32> %passthru
   ret <16 x i32> %res
 }
+
+define <16 x i32> @mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x i32> %a) {
+; ALL-LABEL: mask_shuffle_v4i32_v16i32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03:
+; ALL:       # BB#0:
+; ALL-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; ALL-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vinserti32x8 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %res = shufflevector <4 x i32> %a, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <16 x i32> %res
+}
+
+define <16 x float> @mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03(<4 x float> %a) {
+; ALL-LABEL: mask_shuffle_v4f32_v16f32_00_01_02_03_00_01_02_03_00_01_02_03_00_01_02_03:
+; ALL:       # BB#0:
+; ALL-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; ALL-NEXT:    vinsertf32x8 $1, %ymm0, %zmm0, %zmm0
+; ALL-NEXT:    retq
+  %res = shufflevector <4 x float> %a, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <16 x float> %res
+}

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll?rev=292475&r1=292474&r2=292475&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-512-v8.ll Thu Jan 19 01:37:45 2017
@@ -2571,3 +2571,39 @@ define <8 x i64> @shuffle_v8i64_01234589
   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
   ret <8 x i64> %shuffle
 }
+
+define <8 x i64> @shuffle_v2i64_v8i64_01010101(<2 x i64> %a) {
+; AVX512F-LABEL: shuffle_v2i64_v8i64_01010101:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; AVX512F-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v2i64_v8i64_01010101:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; AVX512F-32-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-32-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
+  %shuffle = shufflevector <2 x i64> %a, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  ret <8 x i64> %shuffle
+}
+
+define <8 x double> @shuffle_v2f64_v8f64_01010101(<2 x double> %a) {
+; AVX512F-LABEL: shuffle_v2f64_v8f64_01010101:
+; AVX512F:       # BB#0:
+; AVX512F-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; AVX512F-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-NEXT:    retq
+;
+; AVX512F-32-LABEL: shuffle_v2f64_v8f64_01010101:
+; AVX512F-32:       # BB#0:
+; AVX512F-32-NEXT:    # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; AVX512F-32-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; AVX512F-32-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0
+; AVX512F-32-NEXT:    retl
+  %shuffle = shufflevector <2 x double> %a, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
+  ret <8 x double> %shuffle
+}