[llvm] e37cdbe - [X86][SSE] Add shufps+shufps test for fold through commutation
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 24 03:16:59 PST 2020
Author: Simon Pilgrim
Date: 2020-01-24T11:16:44Z
New Revision: e37cdbeeabfb17821b9ff5d2f42e9f440882dab8
URL: https://github.com/llvm/llvm-project/commit/e37cdbeeabfb17821b9ff5d2f42e9f440882dab8
DIFF: https://github.com/llvm/llvm-project/commit/e37cdbeeabfb17821b9ff5d2f42e9f440882dab8.diff
LOG: [X86][SSE] Add shufps+shufps test for fold through commutation
As mentioned on D73023, lowerShuffleWithSHUFPS should be able to commute the shufps inputs to fold the second arg as it will then permute the shufps result anyway.
Added:
Modified:
llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
index 4b012e73f9cb..0462caf23fee 100644
--- a/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
+++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v4.ll
@@ -2467,3 +2467,31 @@ define <4 x float> @shuffle_mem_v4f32_4523(<4 x float> %a, <4 x float>* %pb) {
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
ret <4 x float> %shuffle
}
+
+define <4 x float> @shuffle_mem_v4f32_0624(<4 x float> %a0, <4 x float>* %a1) {
+; SSE-LABEL: shuffle_mem_v4f32_0624:
+; SSE: # %bb.0:
+; SSE-NEXT: movaps (%rdi), %xmm1
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2],xmm0[2,0]
+; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,2,1,3]
+; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1OR2-LABEL: shuffle_mem_v4f32_0624:
+; AVX1OR2: # %bb.0:
+; AVX1OR2-NEXT: vmovaps (%rdi), %xmm1
+; AVX1OR2-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,2],xmm0[2,0]
+; AVX1OR2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
+; AVX1OR2-NEXT: retq
+;
+; AVX512VL-LABEL: shuffle_mem_v4f32_0624:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vmovaps (%rdi), %xmm2
+; AVX512VL-NEXT: vmovaps {{.*#+}} xmm1 = [0,6,2,4]
+; AVX512VL-NEXT: vpermi2ps %xmm0, %xmm2, %xmm1
+; AVX512VL-NEXT: vmovaps %xmm1, %xmm0
+; AVX512VL-NEXT: retq
+ %1 = load <4 x float>, <4 x float>* %a1
+ %2 = shufflevector <4 x float> %1, <4 x float> %a0, <4 x i32> <i32 0, i32 6, i32 2, i32 4>
+ ret <4 x float> %2
+}
More information about the llvm-commits
mailing list