[llvm] r217761 - [x86] Teach the x86 DAG combiner to form UNPCKLPS and UNPCKHPS

Mon Sep 15 04:26:25 PDT 2014

Author: chandlerc
Date: Mon Sep 15 06:26:25 2014
New Revision: 217761

URL: http://llvm.org/viewvc/llvm-project?rev=217761&view=rev
Log:
[x86] Teach the x86 DAG combiner to form UNPCKLPS and UNPCKHPS
instructions from the relevant shuffle patterns.

This is the last tweak I'm aware of to generate essentially perfect
v4f32 and v2f64 shuffles with the new vector shuffle lowering up through
SSE4.1. I'm sure I've missed some and it'd be nice to check since v4f32
is amenable to exhaustive exploration, but this is all of the tricks I'm
aware of.

With AVX there is a new trick to use the VPERMILPS instruction, that's
coming up in a subsequent patch.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=217761&r1=217760&r2=217761&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Sep 15 06:26:25 2014
@@ -19413,6 +19413,20 @@ static bool combineX86ShuffleChain(SDVal
                     /*AddTo*/ true);
       return true;
     }
+    if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) {
+      bool Lo = Mask.equals(0, 0, 1, 1);
+      unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
+      MVT ShuffleVT = MVT::v4f32;
+      if (Depth == 1 && Root->getOpcode() == Shuffle)
+        return false; // Nothing to do!
+      Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
+      DCI.AddToWorklist(Op.getNode());
+      Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
+      DCI.AddToWorklist(Op.getNode());
+      DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
+                    /*AddTo*/ true);
+      return true;
+    }
   }
 
   // We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll?rev=217761&r1=217760&r2=217761&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-128-v4.ll Mon Sep 15 06:26:25 2014
@@ -119,6 +119,20 @@ define <4 x float> @shuffle_v4f32_3210(<
   %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
   ret <4 x float> %shuffle
 }
+define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
+; ALL-LABEL: @shuffle_v4f32_0011
+; ALL:         unpcklps {{.*}} # xmm0 = xmm0[0,0,1,1]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
+  ret <4 x float> %shuffle
+}
+define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
+; ALL-LABEL: @shuffle_v4f32_2233
+; ALL:         unpckhps {{.*}} # xmm0 = xmm0[2,2,3,3]
+; ALL-NEXT:    retq
+  %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
+  ret <4 x float> %shuffle
+}
 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
 ; SSE2-LABEL: @shuffle_v4f32_0022
 ; SSE2:         shufps {{.*}} # xmm0 = xmm0[0,0,2,2]