[llvm] 377e86d - [X86][AVX] Add tests showing combineCommutableSHUFP failure to handle v8f32 and v16f32 commutable shufps patterns

Sun Jan 26 06:36:56 PST 2020

Author: Simon Pilgrim
Date: 2020-01-26T14:36:24Z
New Revision: 377e86d12ebeee09916b98ce09b315343d82468e

URL: https://github.com/llvm/llvm-project/commit/377e86d12ebeee09916b98ce09b315343d82468e
DIFF: https://github.com/llvm/llvm-project/commit/377e86d12ebeee09916b98ce09b315343d82468e.diff

LOG: [X86][AVX] Add tests showing combineCommutableSHUFP failure to handle v8f32 and v16f32 commutable shufps patterns

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
    llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
index f1372290fdc8..790a6798e149 100644

--- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -1305,6 +1305,31 @@ define <8 x float> @shuffle_v8f32_32107654_v4f32(<4 x float> %a, <4 x float> %b)
   ret <8 x float> %3
 }
 
+define <8 x float> @shuffle_mem_v8f32_8BA0CFE4(<8 x float> %a0, <8 x float>* %a1) {
+; AVX1OR2-LABEL: shuffle_mem_v8f32_8BA0CFE4:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vmovaps (%rdi), %ymm1
+; AVX1OR2-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[2,0],ymm1[4,4],ymm0[6,4]
+; AVX1OR2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm1[2,0],ymm0[4,7],ymm1[6,4]
+; AVX1OR2-NEXT:    retq
+;
+; AVX512VL-SLOW-LABEL: shuffle_mem_v8f32_8BA0CFE4:
+; AVX512VL-SLOW:       # %bb.0:
+; AVX512VL-SLOW-NEXT:    vmovaps (%rdi), %ymm1
+; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[2,0],ymm1[4,4],ymm0[6,4]
+; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm1[2,0],ymm0[4,7],ymm1[6,4]
+; AVX512VL-SLOW-NEXT:    retq
+;
+; AVX512VL-FAST-LABEL: shuffle_mem_v8f32_8BA0CFE4:
+; AVX512VL-FAST:       # %bb.0:
+; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [0,3,2,8,4,7,6,12]
+; AVX512VL-FAST-NEXT:    vpermt2ps (%rdi), %ymm1, %ymm0
+; AVX512VL-FAST-NEXT:    retq
+  %1 = load <8 x float>, <8 x float>* %a1
+  %2 = shufflevector <8 x float> %1, <8 x float> %a0, <8 x i32> <i32 8, i32 11, i32 10, i32 0, i32 12, i32 15, i32 14, i32 4>
+  ret <8 x float> %2
+}
+
 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-LABEL: shuffle_v8i32_00000000:
 ; AVX1:       # %bb.0:

diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
index d047042419c9..14faa40b654b 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-512-v16.ll
@@ -271,6 +271,18 @@ define <16 x float> @shuffle_v16f32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_
   ret <16 x float> %d
 }
 
+define <16 x float> @shuffle_v16f32_load_08_11_10_00_12_15_14_04(<16 x float> %a0, <16 x float>* %a1) {
+; ALL-LABEL: shuffle_v16f32_load_08_11_10_00_12_15_14_04:
+; ALL:       # %bb.0:
+; ALL-NEXT:    vmovaps (%rdi), %zmm1
+; ALL-NEXT:    vshufps {{.*#+}} zmm1 = zmm1[0,0],zmm0[2,0],zmm1[4,4],zmm0[6,4],zmm1[8,8],zmm0[10,8],zmm1[12,12],zmm0[14,12]
+; ALL-NEXT:    vshufps {{.*#+}} zmm0 = zmm0[0,3],zmm1[2,0],zmm0[4,7],zmm1[6,4],zmm0[8,11],zmm1[10,8],zmm0[12,15],zmm1[14,12]
+; ALL-NEXT:    retq
+  %1 = load <16 x float>, <16 x float>* %a1
+  %2 = shufflevector <16 x float> %1, <16 x float> %a0, <16 x i32> <i32 16, i32 19, i32 18, i32 0, i32 20, i32 23, i32 22, i32 4, i32 24, i32 27, i32 26, i32 8, i32 28, i32 31, i32 30, i32 12>
+  ret <16 x float> %2
+}
+
 define <16 x i32> @shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18(<16 x i32> %a, <16 x i32>* %b)  {
 ; ALL-LABEL: shuffle_v16i32_load_0f_1f_0e_16_0d_1d_04_1e_0b_1b_0a_1a_09_19_08_18:
 ; ALL:       # %bb.0: