[llvm] r313504 - [X86] Teach shuffle lowering to use MOVLHPS/MOVHLPS for lowering v4f32 unary shuffles with SSE1 only.

Sun Sep 17 15:36:41 PDT 2017

Author: ctopper
Date: Sun Sep 17 15:36:41 2017
New Revision: 313504

URL: http://llvm.org/viewvc/llvm-project?rev=313504&view=rev
Log:
[X86] Teach shuffle lowering to use MOVLHPS/MOVHLPS for lowering v4f32 unary shuffles with SSE1 only.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-sse1.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=313504&r1=313503&r2=313504&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 17 15:36:41 2017
@@ -10725,6 +10725,15 @@ static SDValue lowerV4F32VectorShuffle(c
                          getV4X86ShuffleImm8ForMask(Mask, DL, DAG));
     }
 
+    // Use MOVLHPS/MOVHLPS to simulate unary shuffles. These are only valid
+    // in SSE1 because otherwise they are widened to v2f64 and never get here.
+    if (!Subtarget.hasSSE2()) {
+      if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 0, 1}))
+        return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V1);
+      if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 2, 3}))
+        return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V1, V1);
+    }
+
     // Otherwise, use a straight shuffle of a single input vector. We pass the
     // input vector to both operands to simulate this with a SHUFPS.
     return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1,
@@ -10757,11 +10766,14 @@ static SDValue lowerV4F32VectorShuffle(c
         return BlendPerm;
   }
 
-  // Use low/high mov instructions.
-  if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5}))
-    return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
-  if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7}))
-    return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);
+  // Use low/high mov instructions. These are only valid in SSE1 because
+  // otherwise they are widened to v2f64 and never get here.
+  if (!Subtarget.hasSSE2()) {
+    if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5}))
+      return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
+    if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7}))
+      return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);
+  }
 
   // Use dedicated unpack instructions for masks that match their pattern.
   if (SDValue V =

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-sse1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-sse1.ll?rev=313504&r1=313503&r2=313504&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-sse1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-sse1.ll Sun Sep 17 15:36:41 2017
@@ -112,7 +112,7 @@ define <4 x float> @shuffle_v4f32_0145(<
 define <4 x float> @shuffle_v4f32_0101(<4 x float> %a, <4 x float> %b) {
 ; SSE1-LABEL: shuffle_v4f32_0101:
 ; SSE1:       # BB#0:
-; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; SSE1-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
 ; SSE1-NEXT:    retq
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   ret <4 x float> %shuffle
@@ -121,7 +121,7 @@ define <4 x float> @shuffle_v4f32_0101(<
 define <4 x float> @shuffle_v4f32_2323(<4 x float> %a, <4 x float> %b) {
 ; SSE1-LABEL: shuffle_v4f32_2323:
 ; SSE1:       # BB#0:
-; SSE1-NEXT:    shufps {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; SSE1-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
 ; SSE1-NEXT:    retq
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
   ret <4 x float> %shuffle