[llvm] r289407 - [X86][SSE] Add support for combining target shuffles to SHUFPD.

Sun Dec 11 13:26:25 PST 2016

Author: rksimon
Date: Sun Dec 11 15:26:25 2016
New Revision: 289407

URL: http://llvm.org/viewvc/llvm-project?rev=289407&view=rev
Log:
[X86][SSE] Add support for combining target shuffles to SHUFPD.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=289407&r1=289406&r2=289407&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Dec 11 15:26:25 2016
@@ -11867,35 +11867,52 @@ static SDValue lowerShuffleAsRepeatedMas
                               SubLaneMask);
 }
 
-static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
-                                            ArrayRef<int> Mask, SDValue V1,
-                                            SDValue V2, SelectionDAG &DAG) {
+static bool matchVectorShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2,
+                                         unsigned &ShuffleImm,
+                                         ArrayRef<int> Mask) {
+  int NumElts = VT.getVectorNumElements();
+  assert(VT.getScalarType() == MVT::f64 &&
+         (NumElts == 2 || NumElts == 4 || NumElts == 8) &&
+         "Unexpected data type for VSHUFPD");
 
   // Mask for V8F64: 0/1,  8/9,  2/3,  10/11, 4/5, ..
   // Mask for V4F64; 0/1,  4/5,  2/3,  6/7..
-  assert(VT.getScalarSizeInBits() == 64 && "Unexpected data type for VSHUFPD");
-  int NumElts = VT.getVectorNumElements();
+  ShuffleImm = 0;
   bool ShufpdMask = true;
   bool CommutableMask = true;
-  unsigned Immediate = 0;
   for (int i = 0; i < NumElts; ++i) {
-    if (Mask[i] < 0)
+    if (Mask[i] == SM_SentinelUndef)
       continue;
+    if (Mask[i] < 0)
+      return false;
     int Val = (i & 6) + NumElts * (i & 1);
-    int CommutVal = (i & 0xe) + NumElts * ((i & 1)^1);
-    if (Mask[i] < Val ||  Mask[i] > Val + 1)
+    int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1);
+    if (Mask[i] < Val || Mask[i] > Val + 1)
       ShufpdMask = false;
-    if (Mask[i] < CommutVal ||  Mask[i] > CommutVal + 1)
+    if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1)
       CommutableMask = false;
-    Immediate |= (Mask[i] % 2) << i;
+    ShuffleImm |= (Mask[i] % 2) << i;
   }
+
   if (ShufpdMask)
-    return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
-                       DAG.getConstant(Immediate, DL, MVT::i8));
-  if (CommutableMask)
-    return DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1,
-                       DAG.getConstant(Immediate, DL, MVT::i8));
-  return SDValue();
+    return true;
+  if (CommutableMask) {
+    std::swap(V1, V2);
+    return true;
+  }
+
+  return false;
+}
+
+static SDValue lowerVectorShuffleWithSHUFPD(const SDLoc &DL, MVT VT,
+                                            ArrayRef<int> Mask, SDValue V1,
+                                            SDValue V2, SelectionDAG &DAG) {
+  unsigned Immediate = 0;
+  if (!matchVectorShuffleWithSHUFPD(VT, V1, V2, Immediate, Mask))
+    return SDValue();
+
+  return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2,
+                     DAG.getConstant(Immediate, DL, MVT::i8));
 }
 
 static SDValue lowerVectorShuffleWithPERMV(const SDLoc &DL, MVT VT,
@@ -26198,6 +26215,17 @@ static bool matchBinaryPermuteVectorShuf
       return true;
     }
   }
+
+  // Attempt to combine to SHUFPD.
+  if ((MaskVT == MVT::v2f64 && Subtarget.hasSSE2()) ||
+      (MaskVT == MVT::v4f64 && Subtarget.hasAVX()) ||
+      (MaskVT == MVT::v8f64 && Subtarget.hasAVX512())) {
+    if (matchVectorShuffleWithSHUFPD(MaskVT, V1, V2, PermuteImm, Mask)) {
+      Shuffle = X86ISD::SHUFP;
+      ShuffleVT = MaskVT;
+      return true;
+    }
+  }
 
   return false;
 }

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll?rev=289407&r1=289406&r2=289407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-256-v8.ll Sun Dec 11 15:26:25 2016
@@ -687,9 +687,8 @@ define <8 x float> @shuffle_v8f32_f51123
 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
-; AVX1-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
-; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
+; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[3],ymm0[3]
 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
 ; AVX1-NEXT:    retq
 ;

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll?rev=289407&r1=289406&r2=289407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-avx512bw.ll Sun Dec 11 15:26:25 2016
@@ -858,14 +858,12 @@ define <8 x double> @combine_vpermi2var_
 define <8 x double> @combine_vpermi2var_8f64_as_shufpd(<8 x double> %x0, <8 x double> %x1) {
 ; X32-LABEL: combine_vpermi2var_8f64_as_shufpd:
 ; X32:       # BB#0:
-; X32-NEXT:    vmovapd {{.*#+}} zmm2 = [1,0,8,0,2,0,10,0,5,0,13,0,6,0,15,0]
-; X32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; X32-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[1],zmm1[0],zmm0[2],zmm1[2],zmm0[5],zmm1[5],zmm0[6],zmm1[7]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: combine_vpermi2var_8f64_as_shufpd:
 ; X64:       # BB#0:
-; X64-NEXT:    vmovapd {{.*#+}} zmm2 = [1,8,2,10,5,13,6,15]
-; X64-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
+; X64-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[1],zmm1[0],zmm0[2],zmm1[2],zmm0[5],zmm1[5],zmm0[6],zmm1[7]
 ; X64-NEXT:    retq
   %1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> <i64 1, i64 8, i64 2, i64 10, i64 5, i64 13, i64 6, i64 15>, <8 x double> %x1, i8 -1)
   ret <8 x double> %1

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=289407&r1=289406&r2=289407&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Sun Dec 11 15:26:25 2016
@@ -150,12 +150,12 @@ define <4 x float> @combine_vpermil2ps_b
 define <2 x double> @combine_vpermil2pd_as_shufpd(<2 x double> %a0, <2 x double> %a1) {
 ; X32-LABEL: combine_vpermil2pd_as_shufpd:
 ; X32:       # BB#0:
-; X32-NEXT:    vpermil2pd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; X32-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: combine_vpermil2pd_as_shufpd:
 ; X64:       # BB#0:
-; X64-NEXT:    vpermil2pd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
+; X64-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
 ; X64-NEXT:    retq
   %res0 = call <2 x double> @llvm.x86.xop.vpermil2pd(<2 x double> %a0, <2 x double> %a1, <2 x i64> <i64 2, i64 4>, i8 0)
   ret <2 x double> %res0
@@ -164,12 +164,12 @@ define <2 x double> @combine_vpermil2pd_
 define <4 x double> @combine_vpermil2pd256_as_shufpd(<4 x double> %a0, <4 x double> %a1) {
 ; X32-LABEL: combine_vpermil2pd256_as_shufpd:
 ; X32:       # BB#0:
-; X32-NEXT:    vpermil2pd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
+; X32-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: combine_vpermil2pd256_as_shufpd:
 ; X64:       # BB#0:
-; X64-NEXT:    vpermil2pd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
+; X64-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
 ; X64-NEXT:    retq
   %res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a0, <4 x double> %a1, <4 x i64> <i64 0, i64 4, i64 2, i64 7>, i8 0)
   ret <4 x double> %res0