[llvm] e37cdbe - [X86][SSE] Add shufps+shufps test for fold through commutation

Fri Jan 24 03:16:59 PST 2020

Author: Simon Pilgrim
Date: 2020-01-24T11:16:44Z
New Revision: e37cdbeeabfb17821b9ff5d2f42e9f440882dab8

URL: https://github.com/llvm/llvm-project/commit/e37cdbeeabfb17821b9ff5d2f42e9f440882dab8
DIFF: https://github.com/llvm/llvm-project/commit/e37cdbeeabfb17821b9ff5d2f42e9f440882dab8.diff

LOG: [X86][SSE] Add shufps+shufps test for fold through commutation

As mentioned on D73023, lowerShuffleWithSHUFPS should be able to commute the shufps inputs to fold the second arg as it will then permute the shufps result anyway.

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 4b012e73f9cb..0462caf23fee 100644

--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -2467,3 +2467,31 @@ define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) {
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
   ret <4 x float> %shuffle
 }
+
+define  <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, <4 x float>* %a1) {
+; SSE-LABEL: shuffle_mem_v4f32_0624:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movaps (%rdi), %xmm1
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,0]
+; SSE-NEXT:    shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1OR2-LABEL: shuffle_mem_v4f32_0624:
+; AVX1OR2:       # %bb.0:
+; AVX1OR2-NEXT:    vmovaps (%rdi), %xmm1
+; AVX1OR2-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0]
+; AVX1OR2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1OR2-NEXT:    retq
+;
+; AVX512VL-LABEL: shuffle_mem_v4f32_0624:
+; AVX512VL:       # %bb.0:
+; AVX512VL-NEXT:    vmovaps (%rdi), %xmm2
+; AVX512VL-NEXT:    vmovaps {{.*#+}} xmm1 = [0,6,2,4]
+; AVX512VL-NEXT:    vpermi2ps %xmm0, %xmm2, %xmm1
+; AVX512VL-NEXT:    vmovaps %xmm1, %xmm0
+; AVX512VL-NEXT:    retq
+  %1 = load <4 x float>, <4 x float>* %a1
+  %2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
+  ret <4 x float> %2
+}