[llvm] r218208 - [x86] Teach the new vector shuffle lowering of v4f64 to prefer a direct

Sun Sep 21 04:17:55 PDT 2014

Author: chandlerc
Date: Sun Sep 21 06:17:55 2014
New Revision: 218208

URL: http://llvm.org/viewvc/llvm-project?rev=218208&view=rev
Log:
[x86] Teach the new vector shuffle lowering of v4f64 to prefer a direct
VBLENDPD over using VSHUFPD. While the 256-bit variant of VBLENDPD slows
down to the same speed as VSHUFPD on Sandy Bridge CPUs, it has twice the
reciprocal throughput on Ivy Bridge CPUs much like it does everywhere
for 128-bits. There isn't a downside, so just eagerly use this
instruction when it suffices.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=218208&r1=218207&r2=218208&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Sep 21 06:17:55 2014
@@ -7237,6 +7237,7 @@ static SDValue lowerVectorShuffleAsBlend
   switch (VT.SimpleTy) {
   case MVT::v2f64:
   case MVT::v4f32:
+  case MVT::v4f64:
     return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2,
                        DAG.getConstant(BlendMask, MVT::i8));
 
@@ -9229,6 +9230,10 @@ static SDValue lowerV4F64VectorShuffle(S
   if (isShuffleEquivalent(Mask, 5, 1, 7, 3))
     return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4f64, V2, V1);
 
+  if (SDValue Blend =
+          lowerVectorShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, DAG))
+    return Blend;
+
   // Check if the blend happens to exactly fit that of SHUFPD.
   if (Mask[0] < 4 && (Mask[1] == -1 || Mask[1] >= 4) &&
       Mask[2] < 4 && (Mask[3] == -1 || Mask[3] >= 4)) {

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll?rev=218208&r1=218207&r2=218208&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v4.ll Sun Sep 21 06:17:55 2014
@@ -359,7 +359,7 @@ define <4 x double> @shuffle_v4f64_5163(
 define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
 ; ALL-LABEL: @shuffle_v4f64_0527
 ; ALL:       # BB#0:
-; ALL-NEXT:    vshufpd {{.*}} # ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
+; ALL-NEXT:    vblendpd {{.*}} # ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
   ret <4 x double> %shuffle
@@ -368,7 +368,7 @@ define <4 x double> @shuffle_v4f64_0527(
 define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
 ; ALL-LABEL: @shuffle_v4f64_4163
 ; ALL:       # BB#0:
-; ALL-NEXT:    vshufpd {{.*}} # ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
+; ALL-NEXT:    vblendpd {{.*}} # ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
 ; ALL-NEXT:    retq
   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
   ret <4 x double> %shuffle