[llvm] r271831 - [X86][XOP] Added VPERMIL2PD/VPERMIL2PS as a target shuffle type

Sun Jun 5 08:01:45 PDT 2016

Author: rksimon
Date: Sun Jun  5 10:01:45 2016
New Revision: 271831

URL: http://llvm.org/viewvc/llvm-project?rev=271831&view=rev
Log:
[X86][XOP] Added VPERMIL2PD/VPERMIL2PS as a target shuffle type

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=271831&r1=271830&r2=271831&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Sun Jun  5 10:01:45 2016
@@ -3790,6 +3790,7 @@ static bool isTargetShuffle(unsigned Opc
   case X86ISD::VPERMILPI:
   case X86ISD::VPERMILPV:
   case X86ISD::VPERM2X128:
+  case X86ISD::VPERMIL2:
   case X86ISD::VPERMI:
   case X86ISD::VPPERM:
   case X86ISD::VPERMV:
@@ -4929,6 +4930,20 @@ static bool getTargetShuffleMask(SDNode
   case X86ISD::MOVLPS:
     // Not yet implemented
     return false;
+  case X86ISD::VPERMIL2: {
+    IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+    unsigned MaskEltSize = VT.getScalarSizeInBits();
+    SDValue MaskNode = N->getOperand(2);
+    SDValue CtrlNode = N->getOperand(3);
+    if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) {
+      unsigned CtrlImm = CtrlOp->getZExtValue();
+      if (auto *C = getTargetShuffleMaskConstant(MaskNode)) {
+        DecodeVPERMIL2PMask(C, CtrlImm, MaskEltSize, Mask);
+        break;
+      }
+    }
+    return false;
+  }
   case X86ISD::VPPERM: {
     IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
     SDValue MaskNode = N->getOperand(2);
@@ -30113,6 +30128,7 @@ SDValue X86TargetLowering::PerformDAGCom
   case X86ISD::MOVSD:
   case X86ISD::VPPERM:
   case X86ISD::VPERMV3:
+  case X86ISD::VPERMIL2:
   case X86ISD::VPERMILPI:
   case X86ISD::VPERMILPV:
   case X86ISD::VPERM2X128:

Modified: llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll?rev=271831&r1=271830&r2=271831&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vector-shuffle-combining-xop.ll Sun Jun  5 10:01:45 2016
@@ -26,9 +26,7 @@ define <2 x double> @combine_vpermil2pd_
 define <4 x double> @combine_vpermil2pd256_identity(<4 x double> %a0, <4 x double> %a1) {
 ; CHECK-LABEL: combine_vpermil2pd256_identity:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vmovapd {{.*#+}} ymm2 = [2,0,2,0]
-; CHECK-NEXT:    vpermil2pd $0, %ymm2, %ymm0, %ymm1, %ymm0
-; CHECK-NEXT:    vpermil2pd $0, %ymm2, %ymm0, %ymm0, %ymm0
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
 ; CHECK-NEXT:    retq
   %res0 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %a1, <4 x double> %a0, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0)
   %res1 = call <4 x double> @llvm.x86.xop.vpermil2pd.256(<4 x double> %res0, <4 x double> undef, <4 x i64> <i64 2, i64 0, i64 2, i64 0>, i8 0)
@@ -38,9 +36,7 @@ define <4 x double> @combine_vpermil2pd2
 define <4 x float> @combine_vpermil2ps_identity(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: combine_vpermil2ps_identity:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vmovaps {{.*#+}} xmm2 = [3,2,1,0]
-; CHECK-NEXT:    vpermil2ps $0, %xmm2, %xmm0, %xmm1, %xmm0
-; CHECK-NEXT:    vpermil2ps $0, %xmm2, %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    vmovaps %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a1, <4 x float> %a0, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0)
   %res1 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %res0, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>, i8 0)
@@ -50,19 +46,27 @@ define <4 x float> @combine_vpermil2ps_i
 define <8 x float> @combine_vpermil2ps256_identity(<8 x float> %a0, <8 x float> %a1) {
 ; CHECK-LABEL: combine_vpermil2ps256_identity:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vmovaps {{.*#+}} ymm2 = [3,2,1,0,1,0,3,2]
-; CHECK-NEXT:    vpermil2ps $0, %ymm2, %ymm0, %ymm1, %ymm0
-; CHECK-NEXT:    vpermil2ps $0, %ymm2, %ymm0, %ymm0, %ymm0
+; CHECK-NEXT:    vmovaps %ymm1, %ymm0
 ; CHECK-NEXT:    retq
   %res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0)
   %res1 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %res0, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2>, i8 0)
   ret <8 x float> %res1
 }
 
+define <8 x float> @combine_vpermil2ps256_zero(<8 x float> %a0, <8 x float> %a1) {
+; CHECK-LABEL: combine_vpermil2ps256_zero:
+; CHECK:       # BB#0:
+; CHECK-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %res0 = call <8 x float> @llvm.x86.xop.vpermil2ps.256(<8 x float> %a1, <8 x float> %a0, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11>, i8 2)
+  ret <8 x float> %res0
+}
+
 define <4 x float> @combine_vpermil2ps_blend_with_zero(<4 x float> %a0, <4 x float> %a1) {
 ; CHECK-LABEL: combine_vpermil2ps_blend_with_zero:
 ; CHECK:       # BB#0:
-; CHECK-NEXT:    vpermil2ps {{.*#+}} xmm0 = zero,xmm0[1,2,3]
+; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
+; CHECK-NEXT:    vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
 ; CHECK-NEXT:    retq
   %res0 = call <4 x float> @llvm.x86.xop.vpermil2ps(<4 x float> %a0, <4 x float> %a1, <4 x i32> <i32 8, i32 1, i32 2, i32 3>, i8 2)
   ret <4 x float> %res0