[llvm] r175619 - I optimized the following patterns:

Mon Feb 25 09:01:27 PST 2013

Hi Elena,

I have another question:
For sext 4xi8 -> 4xi64, the sequence pasted below is produced on
-mcpu=corei7-avx.
Is there are reason the sequence vpmovzxbd+vpslld+vpsrad is produced
instead of just a vpmovsxbd? Maybe I am missing something.

        vpmovzxbd       (%rax), %xmm0
        vpslld  $24, %xmm0, %xmm0
        vpsrad  $24, %xmm0, %xmm1
        vpmovsxdq       %xmm1, %xmm0
        vmovhlps        %xmm1, %xmm1, %xmm1 # xmm1 = xmm1[1,1]
        vpmovsxdq       %xmm1, %xmm1
        vinsertf128     $1, %xmm1, %ymm0, %ymm0
        vmovaps %ymm0, (%rax)

(sorry for two emails, forgot to reply-all)

- Muhammad Tauqir

On Wed, Feb 20, 2013 at 7:42 AM, Elena Demikhovsky
<elena.demikhovsky at intel.com> wrote:
> Author: delena
> Date: Wed Feb 20 06:42:54 2013
> New Revision: 175619
>
> URL: http://llvm.org/viewvc/llvm-project?rev=175619&view=rev
> Log:
> I optimized the following patterns:
>  sext <4 x i1> to <4 x i64>
>  sext <4 x i8> to <4 x i64>
>  sext <4 x i16> to <4 x i64>
>
> I'm running Combine on SIGN_EXTEND_IN_REG and revert SEXT patterns:
>  (sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) -> (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
>
>  The sext_in_reg (v4i32 x) may be lowered to shl+sar operations.
>  The "sar" does not exist on 64-bit operation, so lowering sext_in_reg (v4i64 x) has no vector solution.
>
> I also added a cost of this operations to the AVX costs table.
>
> Modified:
>     llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
>     llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
>     llvm/trunk/test/Analysis/CostModel/X86/cast.ll
>     llvm/trunk/test/CodeGen/X86/avx-sext.ll
>
> Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=175619&r1=175618&r2=175619&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed Feb 20 06:42:54 2013
> @@ -1323,6 +1323,7 @@ X86TargetLowering::X86TargetLowering(X86
>    setTargetDAGCombine(ISD::ZERO_EXTEND);
>    setTargetDAGCombine(ISD::ANY_EXTEND);
>    setTargetDAGCombine(ISD::SIGN_EXTEND);
> +  setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
>    setTargetDAGCombine(ISD::TRUNCATE);
>    setTargetDAGCombine(ISD::SINT_TO_FP);
>    setTargetDAGCombine(ISD::SETCC);
> @@ -17076,6 +17077,41 @@ static SDValue PerformVZEXT_MOVLCombine(
>    return SDValue();
>  }
>
> +static SDValue PerformSIGN_EXTEND_INREGCombine(SDNode *N, SelectionDAG &DAG,
> +                                               const X86Subtarget *Subtarget) {
> +  EVT VT = N->getValueType(0);
> +  if (!VT.isVector())
> +    return SDValue();
> +
> +  SDValue N0 = N->getOperand(0);
> +  SDValue N1 = N->getOperand(1);
> +  EVT ExtraVT = cast<VTSDNode>(N1)->getVT();
> +  DebugLoc dl = N->getDebugLoc();
> +
> +  // The SIGN_EXTEND_INREG to v4i64 is expensive operation on the
> +  // both SSE and AVX2 since there is no sign-extended shift right
> +  // operation on a vector with 64-bit elements.
> +  //(sext_in_reg (v4i64 anyext (v4i32 x )), ExtraVT) ->
> +  // (v4i64 sext (v4i32 sext_in_reg (v4i32 x , ExtraVT)))
> +  if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND ||
> +      N0.getOpcode() == ISD::SIGN_EXTEND)) {
> +    SDValue N00 = N0.getOperand(0);
> +
> +    // EXTLOAD has a better solution on AVX2,
> +    // it may be replaced with X86ISD::VSEXT node.
> +    if (N00.getOpcode() == ISD::LOAD && Subtarget->hasInt256())
> +      if (!ISD::isNormalLoad(N00.getNode()))
> +        return SDValue();
> +
> +    if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) {
> +        SDValue Tmp = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32,
> +                                  N00, N1);
> +      return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp);
> +    }
> +  }
> +  return SDValue();
> +}
> +
>  static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
>                                    TargetLowering::DAGCombinerInfo &DCI,
>                                    const X86Subtarget *Subtarget) {
> @@ -17468,6 +17504,7 @@ SDValue X86TargetLowering::PerformDAGCom
>    case ISD::ANY_EXTEND:
>    case ISD::ZERO_EXTEND:    return PerformZExtCombine(N, DAG, DCI, Subtarget);
>    case ISD::SIGN_EXTEND:    return PerformSExtCombine(N, DAG, DCI, Subtarget);
> +  case ISD::SIGN_EXTEND_INREG: return PerformSIGN_EXTEND_INREGCombine(N, DAG, Subtarget);
>    case ISD::TRUNCATE:       return PerformTruncateCombine(N, DAG,DCI,Subtarget);
>    case ISD::SETCC:          return PerformISDSETCCCombine(N, DAG);
>    case X86ISD::SETCC:       return PerformSETCCCombine(N, DAG, DCI, Subtarget);
>
> Modified: llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp?rev=175619&r1=175618&r2=175619&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp (original)
> +++ llvm/trunk/lib/Target/X86/X86TargetTransformInfo.cpp Wed Feb 20 06:42:54 2013
> @@ -232,6 +232,9 @@ unsigned X86TTI::getCastInstrCost(unsign
>      { ISD::FP_TO_SINT,  MVT::v4i8,  MVT::v4f32, 1 },
>      { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1,  6 },
>      { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1,  9 },
> +    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1,  8 },
> +    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8,  8 },
> +    { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 8 },
>      { ISD::TRUNCATE,    MVT::v8i32, MVT::v8i64, 3 },
>    };
>
>
> Modified: llvm/trunk/test/Analysis/CostModel/X86/cast.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Analysis/CostModel/X86/cast.ll?rev=175619&r1=175618&r2=175619&view=diff
> ==============================================================================
> --- llvm/trunk/test/Analysis/CostModel/X86/cast.ll (original)
> +++ llvm/trunk/test/Analysis/CostModel/X86/cast.ll Wed Feb 20 06:42:54 2013
> @@ -44,6 +44,10 @@ define i32 @zext_sext(<8 x i1> %in) {
>    %B = zext <8 x i16> undef to <8 x i32>
>    ;CHECK: cost of 1 {{.*}} sext
>    %C = sext <4 x i32> undef to <4 x i64>
> +  ;CHECK: cost of 8 {{.*}} sext
> +  %C1 = sext <4 x i8> undef to <4 x i64>
> +  ;CHECK: cost of 8 {{.*}} sext
> +  %C2 = sext <4 x i16> undef to <4 x i64>
>
>    ;CHECK: cost of 1 {{.*}} zext
>    %D = zext <4 x i32> undef to <4 x i64>
> @@ -59,7 +63,7 @@ define i32 @zext_sext(<8 x i1> %in) {
>    ret i32 undef
>  }
>
> -define i32 @masks(<8 x i1> %in) {
> +define i32 @masks8(<8 x i1> %in) {
>    ;CHECK: cost of 6 {{.*}} zext
>    %Z = zext <8 x i1> %in to <8 x i32>
>    ;CHECK: cost of 9 {{.*}} sext
> @@ -67,3 +71,9 @@ define i32 @masks(<8 x i1> %in) {
>    ret i32 undef
>  }
>
> +define i32 @masks4(<4 x i1> %in) {
> +  ;CHECK: cost of 8 {{.*}} sext
> +  %S = sext <4 x i1> %in to <4 x i64>
> +  ret i32 undef
> +}
> +
>
> Modified: llvm/trunk/test/CodeGen/X86/avx-sext.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx-sext.ll?rev=175619&r1=175618&r2=175619&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/X86/avx-sext.ll (original)
> +++ llvm/trunk/test/CodeGen/X86/avx-sext.ll Wed Feb 20 06:42:54 2013
> @@ -142,3 +142,26 @@ define <8 x i16> @load_sext_test6(<8 x i
>   %Y = sext <8 x i8> %X to <8 x i16>
>   ret <8 x i16>%Y
>  }
> +
> +; AVX: sext_4i1_to_4i64
> +; AVX: vpslld  $31
> +; AVX: vpsrad  $31
> +; AVX: vpmovsxdq
> +; AVX: vpmovsxdq
> +; AVX: ret
> +define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) {
> +  %extmask = sext <4 x i1> %mask to <4 x i64>
> +  ret <4 x i64> %extmask
> +}
> +
> +; AVX: sext_4i8_to_4i64
> +; AVX: vpslld  $24
> +; AVX: vpsrad  $24
> +; AVX: vpmovsxdq
> +; AVX: vpmovsxdq
> +; AVX: ret
> +define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) {
> +  %extmask = sext <4 x i8> %mask to <4 x i64>
> +  ret <4 x i64> %extmask
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits