[llvm] r175619 - I optimized the following patterns:

Wed Feb 20 04:42:54 PST 2013

Author: delena
Date: Wed Feb 20 06:42:54 2013
New Revision: 175619

URL: http://llvm.org/viewvc/llvm-project?rev=175619&view=rev
Log:
I optimized the following patterns:
 sext <4 x i1> to <4 x i64>
 sext <4 x i8> to <4 x i64>
 sext <4 x i16> to <4 x i64>
 
I'm running Combine on SIGN_EXTEND_IN_REG and revert SEXT patterns:
 (sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
 
 The sext_in_reg (v4i32 x) may be lowered to shl+sar operations.
 The "sar" does not exist on 64-bit operation, so lowering sext_in_reg (v4i64 x) has no vector solution.

I also added a cost of this operations to the AVX costs table.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
    llvm/trunk/test/Analysis/CostModel/X86/cast.ll
    llvm/trunk/test/CodeGen/X86/avx-sext.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=175619&r1=175618&r2=175619&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Feb 20 06:42:54 2013
@@ -1323,6 +1323,7 @@ X86TargetLowering::X86TargetLowering(X86
   setTargetDAGCombine(ISD::ZERO_EXTEND);
   setTargetDAGCombine(ISD::ANY_EXTEND);
   setTargetDAGCombine(ISD::SIGN_EXTEND);
+  setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
   setTargetDAGCombine(ISD::TRUNCATE);
   setTargetDAGCombine(ISD::SINT_TO_FP);
   setTargetDAGCombine(ISD::SETCC);
@@ -17076,6 +17077,41 @@ static SDValue PerformVZEXT_MOVLCombine(
   return SDValue();
 }
 
+static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG, 
+                                               const X86Subtarget *Subtarget) {
+  EVT VT = N->getValueType(0);
+  if (!VT.isVector())
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+  EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
+  DebugLoc dl = N->getDebugLoc();
+
+  // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
+  // both SSE and AVX2 since there is no sign-extended shift right
+  // operation on a vector with 64-bit elements.
+  //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
+  // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
+  if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
+      N0.getOpcode() == ISD::SIGN_EXTEND)) {
+    SDValue N00 = N0.getOperand(0);
+
+    // EXTLOAD has a better solution on AVX2, 
+    // it may be replaced with X86ISD::VSEXT node.
+    if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
+      if (!ISD::isNormalLoad(N00.getNode()))
+        return SDValue();
+
+    if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
+        SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, 
+                                  N00, N1);
+      return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
+    }
+  }
+  return SDValue();
+}
+
 static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
                                   TargetLowering::DAGCombinerInfo &DCI,
                                   const X86Subtarget *Subtarget) {
@@ -17468,6 +17504,7 @@ SDValue X86TargetLowering::PerformDAGCom
   case ISD::ANY_EXTEND:
   case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG, DCI, Subtarget);
   case ISD::SIGN_EXTEND:    return PerformSExtCombine(N, DAG, DCI, Subtarget);
+  case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
   case ISD::TRUNCATE:       return PerformTruncateCombine(N, DAG,DCI,Subtarget);
   case ISD::SETCC:          return PerformISDSETCCCombine(N, DAG);
   case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG, DCI, Subtarget);

Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=175619&r1=175618&r2=175619&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Wed Feb 20 06:42:54 2013
@@ -232,6 +232,9 @@ unsigned X86TTI::getCastInstrCost(unsign
     { ISD::FP_TO_SINT,  MVT::v4i8,  MVT::v4f32, 1 },
     { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1,  6 },
     { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1,  9 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1,  8 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8,  8 },
+    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
     { ISD::TRUNCATE,    MVT::v8i32, MVT::v8i64, 3 },
   };
 

Modified: llvm/trunk/test/Analysis/CostModel/X86/cast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/cast.ll?rev=175619&r1=175618&r2=175619&view=diff
==============================================================================
--- llvm/trunk/test/Analysis/CostModel/X86/cast.ll (original)
+++ llvm/trunk/test/Analysis/CostModel/X86/cast.ll Wed Feb 20 06:42:54 2013
@@ -44,6 +44,10 @@ define i32 @zext_sext(<8 x i1> %in) {
   %B = zext <8 x i16> undef to <8 x i32>
   ;CHECK: cost of 1 {{.*}} sext
   %C = sext <4 x i32> undef to <4 x i64>
+  ;CHECK: cost of 8 {{.*}} sext
+  %C1 = sext <4 x i8> undef to <4 x i64>
+  ;CHECK: cost of 8 {{.*}} sext
+  %C2 = sext <4 x i16> undef to <4 x i64>
 
   ;CHECK: cost of 1 {{.*}} zext
   %D = zext <4 x i32> undef to <4 x i64>
@@ -59,7 +63,7 @@ define i32 @zext_sext(<8 x i1> %in) {
   ret i32 undef
 }
 
-define i32 @masks(<8 x i1> %in) {
+define i32 @masks8(<8 x i1> %in) {
   ;CHECK: cost of 6 {{.*}} zext
   %Z = zext <8 x i1> %in to <8 x i32>
   ;CHECK: cost of 9 {{.*}} sext
@@ -67,3 +71,9 @@ define i32 @masks(<8 x i1> %in) {
   ret i32 undef
 }
 
+define i32 @masks4(<4 x i1> %in) {
+  ;CHECK: cost of 8 {{.*}} sext
+  %S = sext <4 x i1> %in to <4 x i64>
+  ret i32 undef
+}
+

Modified: llvm/trunk/test/CodeGen/X86/avx-sext.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-sext.ll?rev=175619&r1=175618&r2=175619&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx-sext.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx-sext.ll Wed Feb 20 06:42:54 2013
@@ -142,3 +142,26 @@ define <8 x i16> @load_sext_test6(<8 x i
  %Y = sext <8 x i8> %X to <8 x i16>
  ret <8 x i16>%Y
 }
+
+; AVX: sext_4i1_to_4i64
+; AVX: vpslld  $31
+; AVX: vpsrad  $31
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
+  %extmask = sext <4 x i1> %mask to <4 x i64>
+  ret <4 x i64> %extmask
+}
+
+; AVX: sext_4i8_to_4i64
+; AVX: vpslld  $24
+; AVX: vpsrad  $24
+; AVX: vpmovsxdq
+; AVX: vpmovsxdq
+; AVX: ret
+define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
+  %extmask = sext <4 x i8> %mask to <4 x i64>
+  ret <4 x i64> %extmask
+}
+