[llvm] r313504 - [X86] Teach shuffle lowering to use MOVLHPS/MOVHLPS for lowering v4f32 unary shuffles with SSE1 only.
Craig Topper via llvm-commits
llvm-commits at lists.llvm.org
Sun Sep 17 15:36:41 PDT 2017
Author: ctopper
Date: Sun Sep 17 15:36:41 2017
New Revision: 313504
URL: http://llvm.org/viewvc/llvm-project?rev=313504&view=rev
Log:
[X86] Teach shuffle lowering to use MOVLHPS/MOVHLPS for lowering v4f32 unary shuffles with SSE1 only.
Modified:
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/test/CodeGen/X86/vector-shuffle-sse1.ll
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=313504&r1=313503&r2=313504&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 17 15:36:41 2017
@@ -10725,6 +10725,15 @@ static SDValue lowerV4F32VectorShuffle(c
getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
}
+ // Use MOVLHPS/MOVHLPS to simulate unary shuffles. These are only valid
+ // in SSE1 because otherwise they are widened to v2f64 and never get here.
+ if (!Subtarget.hasSSE2()) {
+ if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}))
+ return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V1);
+ if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 2, 3}))
+ return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V1, V1);
+ }
+
// Otherwise, use a straight shuffle of a single input vector. We pass the
// input vector to both operands to simulate this with a SHUFPS.
return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
@@ -10757,11 +10766,14 @@ static SDValue lowerV4F32VectorShuffle(c
return BlendPerm;
}
- // Use low/high mov instructions.
- if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5}))
- return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
- if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7}))
- return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);
+ // Use low/high mov instructions. These are only valid in SSE1 because
+ // otherwise they are widened to v2f64 and never get here.
+ if (!Subtarget.hasSSE2()) {
+ if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5}))
+ return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7}))
+ return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);
+ }
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-sse1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-sse1.ll?rev=313504&r1=313503&r2=313504&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-sse1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-sse1.ll Sun Sep 17 15:36:41 2017
@@ -112,7 +112,7 @@ define <4 x float> @shuffle_v4f32_0145(<
define <4 x float> @shuffle_v4f32_0101(<4 x float> %a, <4 x float> %b) {
; SSE1-LABEL: shuffle_v4f32_0101:
; SSE1: # BB#0:
-; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0]
; SSE1-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
ret <4 x float> %shuffle
@@ -121,7 +121,7 @@ define <4 x float> @shuffle_v4f32_0101(<
define <4 x float> @shuffle_v4f32_2323(<4 x float> %a, <4 x float> %b) {
; SSE1-LABEL: shuffle_v4f32_2323:
; SSE1: # BB#0:
-; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE1-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1]
; SSE1-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
ret <4 x float> %shuffle
More information about the llvm-commits
mailing list